diff --git a/README.md b/README.md index 0eeacc3b..dc25da5d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ The intermediate representation will store the network structures as a protobuf - [MXNet](http://mxnet.incubator.apache.org/) - [Tensorflow](https://www.tensorflow.org/) (Experimental) - [Microsoft Cognitive Toolkit (CNTK)](http://www.microsoft.com/en-us/cognitive-toolkit) (Destination only) +- [PyTorch](http://pytorch.org/) (Destination only) #### Tested models @@ -39,20 +40,20 @@ The model conversion between current supported frameworks is tested on some **Im Models | Caffe | Keras | Tensorflow | CNTK | MXNet | PyTorch | :--------------------------------------------------:|:-----:|:-----:|:----------:|:----:|:-----:|:-------:| -[Inception V1](http://arxiv.org/abs/1409.4842v1) | √ | √ | √ | √ | √ -[Inception V3](http://arxiv.org/abs/1512.00567) | × | √ | √ | √ | √ +[Inception V1](http://arxiv.org/abs/1409.4842v1) | √ | √ | √ | √ | √ | x (No LRN) +[Inception V3](http://arxiv.org/abs/1512.00567) | × | √ | √ | √ | √ | √ [ResNet V1 50](https://arxiv.org/abs/1512.03385) | × | √ | √ | o | √ | √ -[ResNet V2 152](https://arxiv.org/abs/1603.05027) | × | √ | √ | √ | √ | +[ResNet V2 152](https://arxiv.org/abs/1603.05027) | × | √ | √ | √ | √ | √ [VGG 19](http://arxiv.org/abs/1409.1556.pdf) | √ | √ | √ | √ | √ | √ -[MobileNet_v1](https://arxiv.org/pdf/1704.04861.pdf)| × | √ | √ | × (No Relu6) | × -[Xception](https://arxiv.org/pdf/1610.02357.pdf) | × | √ | √ | × | × | -[SqueezeNet](https://arxiv.org/pdf/1602.07360) | | √ | √ | √ | √ | +[MobileNet_v1](https://arxiv.org/pdf/1704.04861.pdf)| × | √ | √ | × (No Relu6) | × | × +[Xception](https://arxiv.org/pdf/1610.02357.pdf) | × | √ | √ | × | × | × +[SqueezeNet](https://arxiv.org/pdf/1602.07360) | | √ | √ | √ | √ | × #### On-going frameworks - [Caffe2](https://caffe2.ai/) - [CoreML](https://developer.apple.com/documentation/coreml) -- [PyTorch](http://pytorch.org/) + #### Usage diff --git a/mmdnn/conversion/caffe/common_graph.py b/mmdnn/conversion/caffe/common_graph.py index 26e2cea6..7a62617d 100644 --- a/mmdnn/conversion/caffe/common_graph.py +++ b/mmdnn/conversion/caffe/common_graph.py @@ -1,3 +1,8 @@ +#---------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +#---------------------------------------------------------------------------------------------- + from six import string_types as _string_types from mmdnn.conversion.caffe.errors import ConversionError from mmdnn.conversion.common.IR.graph_pb2 import GraphDef, NodeDef, TensorShape diff --git a/mmdnn/conversion/caffe/mapper.py b/mmdnn/conversion/caffe/mapper.py index f669e8a7..d66646a0 100644 --- a/mmdnn/conversion/caffe/mapper.py +++ b/mmdnn/conversion/caffe/mapper.py @@ -14,13 +14,13 @@ def get_handler_name(node_kind): class NodeMapper(object): - + @classmethod def _convert_output_shape(cls, kwargs, node): shape = TensorShape() dim = shape.dim.add() dim.size = -1 - + if len(node.output_shape) > 2: for i in node.output_shape[2:]: dim = shape.dim.add() @@ -31,22 +31,22 @@ def _convert_output_shape(cls, kwargs, node): dim = shape.dim.add() dim.size = node.output_shape[1] kwargs['_output_shapes'] = [shape] - + @classmethod def get_kernel_params(cls, node): - kwargs = {} + kwargs = {} if node.kernel_parameters.p_h > 0 or node.kernel_parameters.p_w > 0: - padding = [0, 0, node.kernel_parameters.p_h, node.kernel_parameters.p_h, node.kernel_parameters.p_w, node.kernel_parameters.p_w, 0, 0] + padding = [0, node.kernel_parameters.p_h, node.kernel_parameters.p_w, 0, 0, node.kernel_parameters.p_h, node.kernel_parameters.p_w, 0] elif node.kernel_parameters.s_h > 1 or node.kernel_parameters.s_w > 1: - padding = [0, 0, (node.kernel_parameters.s_h - 1) // 2, node.kernel_parameters.s_h // 2, (node.kernel_parameters.s_w - 1) // 2, node.kernel_parameters.s_w // 2, 0, 0] + padding = [0, (node.kernel_parameters.s_h - 1) // 2, (node.kernel_parameters.s_w - 1) // 2, 0, 0, node.kernel_parameters.s_h // 2, node.kernel_parameters.s_w // 2, 0] else: padding = None - - kwargs['padding'] = 'VALID' + + kwargs['auto_pad'] = 'VALID' kwargs['strides'] = [1, node.kernel_parameters.s_h, node.kernel_parameters.s_w, 1] cls._convert_output_shape(kwargs, node) - - return kwargs, {'paddings' : padding, 'mode' : 'CONSTANT', 'constant_values' : 0.0} + + return kwargs, {'pads' : padding, 'mode' : 'constant', 'constant_values' : 0.0} @classmethod @@ -60,7 +60,7 @@ def map_data(cls, node): dim.size = i dim = shape.dim.add() dim.size = node.output_shape.channels - + kwargs = {'shape': shape} # Ignore the dimension of batch size cls._convert_output_shape(kwargs, node) return Node.create('DataInput', **kwargs) @@ -74,16 +74,17 @@ def map_input(cls, node): def map_convolution(cls, node): kwargs, padding = cls.get_kernel_params(node) parent, _ = node.get_only_parent() - kwargs['filter'] = [node.kernel_parameters.k_h, node.kernel_parameters.k_w, parent.output_shape.channels, node.parameters.num_output] - kwargs['use_bias'] = node.parameters.bias_term + kwargs['kernel_shape'] = [node.kernel_parameters.k_h, node.kernel_parameters.k_w, parent.output_shape.channels, node.parameters.num_output] + kwargs['use_bias'] = node.parameters.bias_term group = node.parameters.group if group != 1: kwargs['group'] = group - - if padding['paddings'] != None: - return [Node.create('Pad', **padding), Node.create('Convolution', **kwargs)] + + if padding['pads'] != None: + return [Node.create('Pad', **padding), Node.create('Conv', **kwargs)] else: - return Node.create('Convolution', **kwargs) + kwargs['pads'] = [0] * 8 + return Node.create('Conv', **kwargs) @classmethod @@ -91,12 +92,12 @@ def map_deconvolution(cls, node): raise NotImplementedError() kwargs = cls.get_kernel_params(node) parent, _ = node.get_only_parent() - kwargs['filter'] = [node.kernel_parameters.k_h, node.kernel_parameters.k_w, parent.output_shape.channels, node.parameters.num_output] + kwargs['kernel_shape'] = [node.kernel_parameters.k_h, node.kernel_parameters.k_w, parent.output_shape.channels, node.parameters.num_output] group = node.parameters.group if group != 1: - kwargs['group'] = group + kwargs['group'] = group return Node.create('deconv', **kwargs) - + @classmethod def map_crop(cls, node): offset = node.parameters.offset @@ -105,13 +106,13 @@ def map_crop(cls, node): return Node.create('crop', **kwargs) else: return Node.create('crop') - + @classmethod def map_relu(cls, node): kwargs = {} cls._convert_output_shape(kwargs, node) return Node.create('Relu', **kwargs) - + @classmethod def map_pooling(cls, node): kwargs, padding = cls.get_kernel_params(node) @@ -122,21 +123,22 @@ def map_pooling(cls, node): else: # Stochastic pooling, for instance. raise ConversionError('Unsupported pooling type.') - kwargs['window_shape'] = [1, node.kernel_parameters.k_h, node.kernel_parameters.k_w, 1] + kwargs['kernel_shape'] = [1, node.kernel_parameters.k_h, node.kernel_parameters.k_w, 1] cls._convert_output_shape(kwargs, node) - - if padding['paddings'] != None: + + if padding['pads'] != None: return [Node.create('Pad', **padding), Node.create('Pool', **kwargs)] else: + kwargs['pads'] = [0] * 8 return Node.create('Pool', **kwargs) - + @classmethod def _add_flatten_layer(cls, node): shape = TensorShape() dim = shape.dim.add() - dim.size = -1 - + dim.size = -1 + dim = shape.dim.add() dim.size = 1 for i in node.output_shape[1:]: @@ -149,8 +151,8 @@ def map_inner_product(cls, node): #TODO: Axis assert node.parameters.axis == 1 #TODO: Unbiased - kwargs = {'use_bias' : node.parameters.bias_term, 'units' : node.parameters.num_output} - + kwargs = {'use_bias' : node.parameters.bias_term, 'units' : node.parameters.num_output} + # check if need the Flatten layer parent, _ = node.get_only_parent() ret = [] @@ -158,11 +160,11 @@ def map_inner_product(cls, node): ret.append(cls._add_flatten_layer(parent)) ret.append(Node.create('FullyConnected', **kwargs)) return ret - + @classmethod def map_softmax(cls, node): return Node.create('Softmax') - + @classmethod def map_lrn(cls, node): params = node.parameters @@ -170,19 +172,19 @@ def map_lrn(cls, node): kwargs = {'size': int((params.local_size + 1) / 2), 'alpha': params.alpha, 'beta': params.beta, 'k' : params.k} cls._convert_output_shape(kwargs, node) return Node.create('LRN', **kwargs) - + @classmethod def map_concat(cls, node): kwargs = {'axis': (2, 3, 1, 0)[node.parameters.axis]} cls._convert_output_shape(kwargs, node) return Node.create('Concat', **kwargs) - + @classmethod def map_dropout(cls, node): kwargs = {'keep_prob': node.parameters.dropout_ratio} cls._convert_output_shape(kwargs, node) return Node.create('Dropout', **kwargs) - + @classmethod def map_batch_norm(cls, node): scale_offset = len(node.data) == 4 @@ -191,12 +193,12 @@ def map_batch_norm(cls, node): kwargs['epsilon'] = epsilon cls._convert_output_shape(kwargs, node) return Node.create('batch_normalization', **kwargs) - + @classmethod def map_eltwise(cls, node): operations = {0: 'mul', 1: 'sum', 2: 'max'} op_code = node.parameters.operation - try: + try: return Node.create(operations[op_code]) except KeyError: raise ConversionError('Unknown elementwise operation: {}'.format(op_code)) diff --git a/mmdnn/conversion/caffe/utils.py b/mmdnn/conversion/caffe/utils.py index c521be66..e0f9f88b 100644 --- a/mmdnn/conversion/caffe/utils.py +++ b/mmdnn/conversion/caffe/utils.py @@ -1,6 +1,10 @@ +#---------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +#---------------------------------------------------------------------------------------------- + import re - def get_lower_case(text): ''' Convert PascalCase name to words concatenated by '_'. @@ -19,13 +23,3 @@ def get_upper_case(text): def get_real_name(text): text = text.strip().split(':') return ''.join(text[:-1]) - -def listToStr(data): - ret = "" - first = True - for e in data: - if first == False: - ret += ", " - ret += str(e) - first = False - return ret \ No newline at end of file diff --git a/mmdnn/conversion/cntk/cntk_emitter.py b/mmdnn/conversion/cntk/cntk_emitter.py index 963e7bfb..e53497b2 100644 --- a/mmdnn/conversion/cntk/cntk_emitter.py +++ b/mmdnn/conversion/cntk/cntk_emitter.py @@ -9,6 +9,7 @@ import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType from mmdnn.conversion.common.DataStructure.emitter import Emitter +from mmdnn.conversion.common.utils import * class CntkEmitter(Emitter): @@ -32,10 +33,10 @@ def __init__(self, model): else: network_path = model[0] self._load_weights(model[1]) - + self.IR_graph = IRGraph(network_path) super(CntkEmitter, self)._build() - + @property def header_code(self): @@ -49,7 +50,7 @@ def header_code(self): def load_weights(weight_file): if weight_file == None: return - + try: weights_dict = np.load(weight_file).item() except: @@ -78,7 +79,7 @@ def gen_code(self, phase = 'test'): func(current_node) else: print("CntkEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) + self.emit_UNKNOWN(current_node) self.add_body(1, "return {}".format( ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) @@ -88,7 +89,7 @@ def gen_code(self, phase = 'test'): func = getattr(self, "_layer_" + i) func() - return self.body_codes + return self.body_code @staticmethod @@ -97,27 +98,27 @@ def _shapeToStr(shapes): return ', '.join('%s' % i for i in new_shape) - def emit_Convolution(self, IR_node): + def emit_Conv(self, IR_node): if self.weight_loaded: self.used_layers.add(IR_node.type) - dim = len(IR_node.layer.attr['strides'].list.i) - 2 - padding = [False] + [IR_node.layer.attr['padding'].s == b'SAME'] * dim + dim = len(IR_node.get_attr('strides')) - 2 + padding = [False] + [IR_node.get_attr('auto_pad') != 'VALID'] * dim self.add_body(1, "{:<15} = convolution({}, strides = ({},), auto_padding = [{}], name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), - ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), + ', '.join('%s' % i for i in IR_node.get_attr('strides')[1:-1]), ', '.join('%s' % i for i in padding), IR_node.name)) - + else: self.add_body(1, "{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n".format( IR_node.variable_name, IR_node.name, - IR_node.layer.attr["filter"].list.i[-1], - ', '.join('%s' % i for i in IR_node.layer.attr["kernel_size"].list.i[-2]), + IR_node.get_attr('kernel_shape')[-1], + ', '.join('%s' % i for i in IR_node.layer.attr["kernel_shape"].list.i[-2]), ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), - IR_node.layer.attr['padding'].s == b'SAME', - IR_node.layer.attr['use_bias'].b, + IR_node.get_attr('auto_pad') != 'VALID', + IR_node.get_attr('use_bias'), self.parent_variable_name(IR_node))) @@ -128,22 +129,22 @@ def emit_Pool(self, IR_node): self.add_body(1, "{:<15} = global_pooling({}, '{}', name = '{}')".format( IR_node.variable_name, input_node, - IR_node.layer.attr['pooling_type'].s.decode('utf-8'), + IR_node.get_attr('pooling_type'), IR_node.name)) else: - for e in IR_node.IR_layer.attr["dilation_rate"].list.i: + for e in IR_node.get_attr('dilations', []): assert e == 1 - - pool_size = ', '.join('%s' % id for id in IR_node.layer.attr['window_shape'].list.i[1:-1]) - strides = ', '.join('%s' % id for id in IR_node.layer.attr['strides'].list.i[1:-1]) - padding = IR_node.layer.attr['padding'].s == b'SAME' - + + pool_size = ', '.join('%s' % id for id in IR_node.get_attr('kernel_shape')[1:-1]) + strides = ', '.join('%s' % id for id in IR_node.get_attr('strides')[1:-1]) + padding = IR_node.get_attr('auto_pad') != 'VALID' + if self.weight_loaded: self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = pooling({}, '{}', filter_shape = ({}), strides = ({}), pad = {}, name = '{}')".format( IR_node.variable_name, input_node, - IR_node.layer.attr['pooling_type'].s.decode('utf-8'), + IR_node.get_attr('pooling_type'), pool_size, strides, padding, @@ -157,11 +158,11 @@ def emit_UNKNOWN(self, IR_node): print(IR_node.IR_layer.name) - def emit_DataInput(self, IR_node): + def emit_DataInput(self, IR_node): shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape) dtype_str = ", dtype = {}".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" - self.add_body(1, "{:<15} = cntk.input_variable(({},) {}, name = '{}')\n".format( - IR_node.variable_name, + self.add_body(1, "{:<15} = cntk.input_variable(({},) {}, name='{}')".format( + IR_node.variable_name, shape_str, dtype_str, IR_node.name)) @@ -169,17 +170,17 @@ def emit_DataInput(self, IR_node): def emit_Dropout(self, IR_node): parent = self.IR_graph.get_parent(IR_node.name, [0]) - if self.phase == 'train': + if self.phase == 'train': self.add_body(1, "{:<15} = Dropout({}, name = '{}')({})".format( IR_node.variable_name, - 1 - IR_node.IR_layer.attr["keep_prob"].f, + 1 - IR_node.get_attr('keep_prob'), IR_node.name, parent.real_variable_name)) else: IR_node.real_name = parent.real_name - def emit_FullyConnected(self, IR_node): + def emit_FullyConnected(self, IR_node): input_node = self.parent_variable_name(IR_node) if self.weight_loaded: self.used_layers.add(IR_node.type) @@ -197,18 +198,18 @@ def emit_FullyConnected(self, IR_node): input_node)) - def emit_Flatten(self, IR_node): + def emit_Flatten(self, IR_node): self.add_body(1, "{:<15} = ops.reshape({}, (-1,), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.name)) - def emit_Reshape(self, IR_node): + def emit_Reshape(self, IR_node): self.add_body(1, "{:<15} = cntk.reshape({}, shape = ({},) name = '{}')".format( IR_node.variable_name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name, - ', '.join('%s' % i for i in IR_node.layer.attr["shape"].list.i), + ', '.join('%s' % i for i in IR_node.get_attr('shape')), IR_node.name)) @@ -219,7 +220,7 @@ def _emit_activation(self, IR_node, op_name): IR_node.name, self.parent_variable_name(IR_node))) - + def emit_Tanh(self, IR_node): self._emit_activation(IR_node, 'ops.tanh') @@ -234,10 +235,10 @@ def emit_Softmax(self, IR_node): def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'ops.sigmoid') - + def emit_RNNs(self, IR_node, func): - assert False + assert False def emit_LSTM(self, IR_node): @@ -258,41 +259,39 @@ def emit_Add(self, IR_node): def emit_Concat(self, IR_node): inputs = ', '.join(self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) - self.add_body(1, "{:<15} = cntk.splice({}, axis = {}, name = '{}')".format( + self.add_body(1, "{:<15} = cntk.splice({}, axis={}, name='{}')".format( IR_node.variable_name, inputs, - IR_node.layer.attr['axis'].i - 1, + IR_node.get_attr('axis') - 1, IR_node.name)) - def emit_BatchNorm(self, IR_node): + def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(1, "{:<15} = batch_normalization({}, epsilon = {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), - IR_node.layer.attr['epsilon'].f, + IR_node.get_attr('epsilon'), IR_node.name)) - - def emit_Pad(self, IR_node): - if IR_node.layer.attr['mode'].s == b'CONSTANT': - mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format(IR_node.layer.attr['constant_values'].f) - elif IR_node.layer.attr['mode'].s == b'REFLECT': + + def emit_Pad(self, IR_node): + if IR_node.get_attr('mode') == 'constant': + mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format(IR_node.get_attr('constant_values', 0.0)) + elif IR_node.get_attr('mode') == 'reflect': mode = 'mode = ops.REFLECT_PAD' - elif IR_node.layer.attr['mode'].s == b'SYMMETRIC': + elif IR_node.get_attr('mode') == 'SYMMETRIC': mode = 'mode = ops.SYMMETRIC_PAD' else: assert False - padding_str = ', '.join('(%s, %s)' % - (IR_node.layer.attr['paddings'].list.i[idx], - IR_node.layer.attr['paddings'].list.i[idx + 1]) - for idx in range(2, len(IR_node.layer.attr['paddings'].list.i), 2)) + padding = IR_node.get_attr('pads') + padding = convert_onnx_pad_to_tf(padding)[1:] - self.add_body(1, "{:<15} = ops.pad({}, pattern = [{}], {})".format( + self.add_body(1, "{:<15} = ops.pad({}, pattern = {}, {})".format( IR_node.variable_name, self.parent_variable_name(IR_node), - padding_str, + padding, mode)) @@ -304,19 +303,19 @@ def emit_ReduceMean(self, IR_node): self.add_body(1, "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), - ', '.join('%s' % (i - 1) for i in IR_node.layer.attr['axes'].list.i), + ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')), IR_node.name)) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) - self.add_body(1, "{:<15} = lrn({}, k = 1, n = {}, alpha = {}, beta = {}, name = '{}')".format( + self.add_body(1, "{:<15} = lrn({}, k=1, n={}, alpha={}, beta={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), IR_node.layer.attr['size'].i, IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, - IR_node.name)) + IR_node.name)) def _layer_LRN(self): @@ -339,11 +338,11 @@ def dense(input, name, **kwargs): """) - def _layer_Convolution(self): + def _layer_Conv(self): self.add_body(0, """ -def convolution(input, name, **kwargs): +def convolution(input, name, **kwargs): dim = __weights_dict[name]['weights'].ndim - + weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2))) w = cntk.Parameter(init = weight, name = name + '_weight') @@ -388,7 +387,7 @@ def batch_normalization(input, name, epsilon, **kwargs): name = name + "_mean") var = cntk.Parameter(init = __weights_dict[name]['var'], name = name + "_var") - + layer = (input - mean) / cntk.sqrt(var + epsilon) if 'scale' in __weights_dict[name]: scale = cntk.Parameter(init = __weights_dict[name]['scale'], @@ -396,9 +395,9 @@ def batch_normalization(input, name, epsilon, **kwargs): layer = scale * layer if 'bias' in __weights_dict[name]: - bias = cntk.Parameter(init = __weights_dict[name]['bias'], + bias = cntk.Parameter(init = __weights_dict[name]['bias'], name = name + "_bias") layer = layer + bias - + return layer """) diff --git a/mmdnn/conversion/common/IR/IR_graph.py b/mmdnn/conversion/common/IR/IR_graph.py index bf36fb27..842803df 100644 --- a/mmdnn/conversion/common/IR/IR_graph.py +++ b/mmdnn/conversion/common/IR/IR_graph.py @@ -4,9 +4,11 @@ #---------------------------------------------------------------------------------------------- import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import TensorShape +from mmdnn.conversion.common.utils import * +from mmdnn.conversion.common.IR.graph_pb2 import TensorShape, AttrValue from mmdnn.conversion.common.DataStructure.graph import Graph, GraphNode + def load_protobuf_from_file(container, filename): with open(filename, 'rb') as fin: file_content = fin.read() @@ -16,6 +18,7 @@ def load_protobuf_from_file(container, filename): container.ParseFromString(file_content) print("Parse file [%s] with binary format successfully." % (filename)) return container + except Exception as e: # pylint: disable=broad-except print ("Info: Trying to parse file [%s] with binary format but failed with error [%s]." % (filename, str(e))) @@ -49,36 +52,18 @@ def type(self): return self.layer.op def set_attrs(self, attrs): - for name, val in attrs.items(): - if isinstance(val, bool): - self.attr[name] = val - elif isinstance(val, int): - self.attr[name].i = val - elif isinstance(val, float): - self.attr[name].f = val - elif isinstance(val, str): - self.attr[name].s = val.encode('utf-8') - elif isinstance(val, TensorShape): - self.attr[name].shape.MergeFromString(val.SerializeToString()) - elif isinstance(val, list): - if len(val) == 0: return - - if isinstance(val[0], int): - self.attr[name].list.i.extend(val) - elif isinstance(val[0], TensorShape): - self.attr[name].list.shape.extend(val) - else: - raise NotImplementedError('AttrValue cannot be list of %s' % type(val[0])) - else: - raise NotImplementedError('AttrValue cannot be of %s' % type(val)) - + assign_IRnode_values(self, attrs) + def get_attr(self, name, default_value = None): if name in self.layer.attr: attr = self.layer.attr[name] field = attr.WhichOneof('value') val = getattr(attr, field) if field else default_value - return val.decode('utf-8') if isinstance(val, bytes) else val + if isinstance(val, AttrValue.ListValue): + return list(val.ListFields()[0][1]) + else: + return val.decode('utf-8') if isinstance(val, bytes) else val else: return default_value diff --git a/mmdnn/conversion/common/utils.py b/mmdnn/conversion/common/utils.py new file mode 100644 index 00000000..00a86b5a --- /dev/null +++ b/mmdnn/conversion/common/utils.py @@ -0,0 +1,116 @@ +#---------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +#---------------------------------------------------------------------------------------------- + +from __future__ import division +import math +import numpy as np + +__all__ = ["assign_IRnode_values", "convert_onnx_pad_to_tf", 'convert_tf_pad_to_onnx', 'compute_tf_same_padding', 'is_valid_padding'] + +def assign_attr_value(attr, val): + from mmdnn.conversion.common.IR.graph_pb2 import TensorShape + '''Assign value to AttrValue proto according to data type.''' + if isinstance(val, bool): + attr.b = val + elif isinstance(val, int): + attr.i = val + elif isinstance(val, float): + attr.f = val + elif isinstance(val, str): + attr.s = val.encode('utf-8') + elif isinstance(val, bytes): + attr.s = val + elif isinstance(val, TensorShape): + attr.shape.MergeFromString(val.SerializeToString()) + elif isinstance(val, list): + if not val: + return + if isinstance(val[0], int): + attr.list.i.extend(val) + elif isinstance(val[0], TensorShape): + attr.list.shape.extend(val) + else: + raise NotImplementedError('AttrValue cannot be of %s %s' % (type(val), type(val[0]))) + else: + raise NotImplementedError('AttrValue cannot be of %s' % type(val)) + + +def assign_IRnode_values(IR_node, val_dict): + for name, val in val_dict.items(): + assign_attr_value(IR_node.attr[name], val) + + +# For padding +def convert_tf_pad_to_onnx(pads): + pads = np.reshape(pads, -1).tolist() + dims = len(pads) + assert dims % 2 == 0 + ret = [] + for idx in range(0, dims, 2): + ret.append(pads[idx]) + for idx in range(1, dims, 2): + ret.append(pads[idx]) + return ret + + +def convert_onnx_pad_to_tf(pads): + return np.transpose(np.array(pads).reshape([2, -1])).reshape(-1, 2).tolist() + + +def is_valid_padding(pads): + return sum(np.reshape(pads, -1)) == 0 + + +def compute_tf_same_padding(input_shape, kernel_shape, strides, data_format='NHWC'): + """ Convert [SAME] padding in tensorflow, keras to onnx pads, + i.e. [x1_begin, x2_begin...x1_end, x2_end,...] """ + # print (input_shape) + # print (kernel_shape) + # print (strides) + if data_format.startswith('NC'): + # Not tested + input_shape = input_shape[2:] + remove_dim = len(strides) - len(input_shape) + if remove_dim > 0: + strides = strides[remove_dim::] + + else: + input_shape = input_shape[1:-1] + remove_dim = len(input_shape) - len(strides) + 1 + if remove_dim < 0: + strides = strides[1:remove_dim] + + # print (input_shape) + # print (kernel_shape) + # print (strides) + + up_list = [0] + down_list = [0] + + for idx in range(0, len(input_shape)): + # kernel_shape[idx] = (kernel_shape[idx] - 1) * dilation_rate + 1 + output_shape = (input_shape[idx] + strides[idx] - 1) // strides[idx] + this_padding = (output_shape - 1) * strides[idx] + kernel_shape[idx] - input_shape[idx] + this_padding = max(0, this_padding) + up_list.append(this_padding // 2) + down_list.append(this_padding - this_padding // 2) + + # print ([0] + up_list + [0] + down_list if data_format.startswith('NC') else up_list + [0] + down_list + [0]) + # print ('-----------------------------------------------------') + return [0] + up_list + [0] + down_list if data_format.startswith('NC') else up_list + [0] + down_list + [0] +""" +int64 effective_filter_size = (filter_size - 1) * dilation_rate + 1; + switch (padding_type) { + case Padding::SAME: + *output_size = (input_size + stride - 1) / stride; + const int64 padding_needed = + std::max(0LL, (*output_size - 1) * stride + effective_filter_size - + input_size); + // For odd values of total padding, add more padding at the 'right' + // side of the given dimension. + *padding_before = padding_needed / 2; + *padding_after = padding_needed - *padding_before; + break; +""" \ No newline at end of file diff --git a/mmdnn/conversion/examples/imagenet_test.py b/mmdnn/conversion/examples/imagenet_test.py index 324b6c6e..4559e711 100644 --- a/mmdnn/conversion/examples/imagenet_test.py +++ b/mmdnn/conversion/examples/imagenet_test.py @@ -4,7 +4,6 @@ #---------------------------------------------------------------------------------------------- from __future__ import absolute_import - import argparse import numpy as np import sys @@ -162,6 +161,7 @@ def print_intermediate_result(self, intermediate_output, if_transpose = False): print (intermediate_output) print (intermediate_output.shape) + print ("%.30f" % np.sum(intermediate_output)) def test_truth(self): diff --git a/mmdnn/conversion/examples/keras/extract_model.py b/mmdnn/conversion/examples/keras/extract_model.py index 2a32b046..56c137f7 100644 --- a/mmdnn/conversion/examples/keras/extract_model.py +++ b/mmdnn/conversion/examples/keras/extract_model.py @@ -67,5 +67,14 @@ def _main(): result = [(i, predict[i]) for i in top_indices] print (result) + # layer_name = 'block2_pool' + # intermediate_layer_model = keras.Model(inputs=model.input, + # outputs=model.get_layer(layer_name).output) + # intermediate_output = intermediate_layer_model.predict(img) + # print (intermediate_output) + # print (intermediate_output.shape) + # print ("%.30f" % np.sum(intermediate_output)) + + if __name__=='__main__': _main() \ No newline at end of file diff --git a/mmdnn/conversion/examples/keras/imagenet_test.py b/mmdnn/conversion/examples/keras/imagenet_test.py index 33b0bc8d..2fbad4a4 100644 --- a/mmdnn/conversion/examples/keras/imagenet_test.py +++ b/mmdnn/conversion/examples/keras/imagenet_test.py @@ -38,7 +38,7 @@ def print_intermediate_result(self, layer_name, if_transpose = False): def inference(self, image_path): self.preprocess(image_path) - # self.print_intermediate_result('pool1_norm1', True) + # self.print_intermediate_result('block2_pool', False) self.print_result() diff --git a/mmdnn/conversion/examples/pytorch/imagenet_test.py b/mmdnn/conversion/examples/pytorch/imagenet_test.py index b5513394..885b280d 100644 --- a/mmdnn/conversion/examples/pytorch/imagenet_test.py +++ b/mmdnn/conversion/examples/pytorch/imagenet_test.py @@ -15,6 +15,10 @@ class TestTorch(TestKit): def __init__(self): super(TestTorch, self).__init__() + + self.truth['tensorflow']['inception_v3'] = [(22, 9.6691055), (24, 4.3524747), (25, 3.5957973), (132, 3.5657473), (23, 3.346283)] + self.truth['keras']['inception_v3'] = [(21, 0.93430489), (23, 0.002883445), (131, 0.0014781791), (24, 0.0014518998), (22, 0.0014435351)] + self.model = self.MainModel.KitModel(self.args.w) self.model.eval() @@ -28,27 +32,25 @@ def preprocess(self, image_path): def print_result(self): predict = self.model(self.data) - predict = predict.data.numpy() + predict = predict.data.numpy() super(TestTorch, self).print_result(predict) - def print_intermediate_result(self, layer_name, if_transpose = False): - testop = self.testop - intermediate_output = testop(self.data).data.numpy() - super(TestTorch, self).predict(intermediate_output, if_transpose) + def print_intermediate_result(self, layer_name, if_transpose=False): + intermediate_output = self.model.test.data.numpy() + super(TestTorch, self).print_intermediate_result(intermediate_output, if_transpose) def inference(self, image_path): self.preprocess(image_path) - # self.print_intermediate_result('conv1_7x7_s2_1', False) - self.print_result() - self.test_truth() + # self.print_intermediate_result(None, False) + self.test_truth() if __name__=='__main__': tester = TestTorch() - tester.inference(tester.args.image) \ No newline at end of file + tester.inference(tester.args.image) diff --git a/mmdnn/conversion/keras/README.md b/mmdnn/conversion/keras/README.md index 6a870356..0ee3541f 100644 --- a/mmdnn/conversion/keras/README.md +++ b/mmdnn/conversion/keras/README.md @@ -81,6 +81,14 @@ Keras model file is saved as [keras_inception_v3.h5], generated by [keras_incept - Add - Concat +- Relu +- Softmax +- Tanh +- Sigmoid +- Softplus +- Softsign +- HardSigmoid +- Elu ## Develop version diff --git a/mmdnn/conversion/keras/keras2_emitter.py b/mmdnn/conversion/keras/keras2_emitter.py index f00ebc0b..36ad6fa7 100644 --- a/mmdnn/conversion/keras/keras2_emitter.py +++ b/mmdnn/conversion/keras/keras2_emitter.py @@ -9,6 +9,7 @@ import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType from mmdnn.conversion.common.DataStructure.emitter import Emitter +from mmdnn.conversion.common.utils import * class Keras2Emitter(Emitter): @@ -47,19 +48,19 @@ def header_code(self): def load_weights(model, weight_file): import numpy as np - + if weight_file == None: return - + try: weights_dict = np.load(weight_file).item() except: - weights_dict = np.load(weight_file, encoding='bytes').item() + weights_dict = np.load(weight_file, encoding='bytes').item() for layer in model.layers: if layer.name in weights_dict: cur_dict = weights_dict[layer.name] - current_layer_parameters = list() + current_layer_parameters = list() if layer.__class__.__name__ == "BatchNormalization": if 'scale' in cur_dict: current_layer_parameters.append(cur_dict['scale']) @@ -121,9 +122,9 @@ def _emit_activation(self, IR_node, op): op, self.parent_variable_name(IR_node))) - + def _emit_merge(self, IR_node, func): - inputs = ', '.join('%s' % self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) + inputs = ', '.join('%s' % self.IR_graph.get_node(i).real_variable_name for i in IR_node.in_edges) axis = ' axis = {},'.format(IR_node.get_attr('axis')) if 'axis' in IR_node.layer.attr else "" self.add_body(1, "{:<15} = layers.{}(name = '{}',{} inputs = [{}])".format( IR_node.variable_name, @@ -131,17 +132,52 @@ def _emit_merge(self, IR_node, func): IR_node.name, axis, inputs)) - + + + @staticmethod + def _convert_padding(padding): + padding = convert_onnx_pad_to_tf(padding)[1:-1] + for idx, pad in enumerate(padding): + padding[idx] = tuple(pad) + padding = tuple(padding) + return padding + + + def _defuse_padding(self, IR_node): + auto_pad = IR_node.get_attr('auto_pad') + if auto_pad: + input_node = self.parent_variable_name(IR_node) + if auto_pad == 'VALID': + padding = 'valid' + elif auto_pad.startswith("SAME"): + padding = 'same' + else: + assert False + return input_node, padding + + else: + padding = IR_node.get_attr("pads") + padding = self._convert_padding(padding) + if is_valid_padding(padding) == False: + input_node = IR_node.variable_name + '_input' + self.add_body(1, "{:<15} = layers.ZeroPadding{}D(padding = {})({})".format( + input_node, + len(padding), + padding, + self.parent_variable_name(IR_node))) + else: + input_node = self.parent_variable_name(IR_node) + + return input_node, 'valid' + def _emit_convolution(self, IR_node, conv_type): - filters = IR_node.IR_layer.attr["filter"].list.i[-1] + filters = IR_node.get_attr('kernel_shape')[-1] filters_str = 'filters = {}'.format(filters) if conv_type.startswith('layer') else 'depth_multiplier = {}'.format(filters) - kernel_size = ', '.join('%s' % i for i in IR_node.layer.attr['filter'].list.i[:-2]) - strides = ','.join('%s' % i for i in IR_node.IR_layer.attr["strides"].list.i[1:-1]) - use_bias = IR_node.IR_layer.attr["use_bias"].b - padding = IR_node.IR_layer.attr["padding"].s.decode('utf-8') - padding = padding.lower() + kernel_size = ', '.join('%s' % i for i in IR_node.get_attr('kernel_shape')[:-2]) + strides = ', '.join('%s' % i for i in IR_node.IR_layer.attr["strides"].list.i[1:-1]) + input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = {}(name = '{}', {}, kernel_size = ({}), strides = ({}), padding = '{}', use_bias = {})({})".format( IR_node.variable_name, conv_type, @@ -150,64 +186,25 @@ def _emit_convolution(self, IR_node, conv_type): kernel_size, strides, padding, - use_bias, - self.parent_variable_name(IR_node))) + IR_node.get_attr('use_bias'), + input_node)) - def emit_Convolution(self, IR_node): - dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 + def emit_Conv(self, IR_node): + dim = len(IR_node.get_attr('kernel_shape')) - 2 return self._emit_convolution(IR_node, 'layers.Conv{}D'.format(dim)) - def emit_Pool(self, IR_node): - dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 - - if IR_node.layer.attr['pooling_type'].s == b"MAX": - pool_name = "MaxPooling{}D".format(dim) - elif IR_node.layer.attr['pooling_type'].s == b"AVG": - pool_name = "AveragePooling{}D".format(dim) - else: - assert False - - if IR_node.layer.attr['global_pooling'].b: - self.add_body(1, "{:<15} = layers.Global{}(name = '{}')({})".format( - IR_node.variable_name, - pool_name, - IR_node.name, - self.parent_variable_name(IR_node))) - - else: - for e in IR_node.IR_layer.attr["dilation_rate"].list.i: - assert e == 1 - - padding = IR_node.IR_layer.attr["padding"].s.decode('utf-8') - padding = padding.lower() - - pool_size = IR_node.IR_layer.attr['window_shape'].list.i[1:-1] - pool_size = ', '.join('%s' % i for i in pool_size) - strides = IR_node.IR_layer.attr['strides'].list.i[1:-1] - strides = ', '.join('%s' % i for i in strides) - - self.add_body(1, "{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( - IR_node.variable_name, - pool_name, - IR_node.name, - pool_size, - strides, - padding, - self.parent_variable_name(IR_node))) - - ############# # Operators # ############# def emit_UNKNOWN(self, IR_node): print (IR_node.name) - + def emit_Add(self, IR_node): - self._emit_merge(IR_node, "add") + self._emit_merge(IR_node, "add") def emit_DataInput(self, IR_node): @@ -231,7 +228,7 @@ def emit_Dropout(self, IR_node): IR_node.IR_layer.attr["keep_prob"].f, seed, self.parent_variable_name(IR_node))) - + def emit_FullyConnected(self, IR_node): self.add_body(1, "{:<15} = layers.Dense(name = '{}', units = {}, use_bias = {})({})".format( @@ -245,10 +242,51 @@ def emit_FullyConnected(self, IR_node): def emit_Flatten(self, IR_node): self.used_layers.add('Flatten') self.add_body(1, "{:<15} = __flatten(name = '{}', input = {})".format( - IR_node.variable_name, + IR_node.variable_name, IR_node.name, self.parent_variable_name(IR_node))) - + + + def emit_Pool(self, IR_node): + dim = len(IR_node.get_attr("strides")) - 2 + + pooling_type = IR_node.get_attr('pooling_type') + if pooling_type == "MAX": + pool_name = "MaxPooling{}D".format(dim) + elif pooling_type == "AVG": + pool_name = "AveragePooling{}D".format(dim) + else: + assert False + + if IR_node.layer.attr['global_pooling'].b: + self.add_body(1, "{:<15} = layers.Global{}(name = '{}')({})".format( + IR_node.variable_name, + pool_name, + IR_node.name, + self.parent_variable_name(IR_node))) + + else: + dilations = IR_node.get_attr('dilations') + if dilations: + for e in IR_node.get_attr('dilations'): + assert e == 1 + + pool_size = IR_node.get_attr('kernel_shape')[1:-1] + pool_size = ', '.join('%s' % i for i in pool_size) + strides = IR_node.get_attr('strides')[1:-1] + strides = ', '.join('%s' % i for i in strides) + + input_node, padding = self._defuse_padding(IR_node) + + self.add_body(1, "{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( + IR_node.variable_name, + pool_name, + IR_node.name, + pool_size, + strides, + padding, + input_node)) + def emit_Reshape(self, IR_node): shape_str = self.shapeToStr(IR_node.IR_layer.attr["shape"].list.i) @@ -269,15 +307,15 @@ def emit_Relu(self, IR_node): def emit_Softmax(self, IR_node): self._emit_activation(IR_node, 'softmax') - + def emit_Sigmoid(self, IR_node): self._emit_activation(IR_node, 'sigmoid') - + def emit_Embedding(self, IR_node): self.add_body(1, "{:<15} = layers.Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( - IR_node.variable_name, + IR_node.variable_name, IR_node.get_attr('input_dim'), IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, @@ -292,9 +330,9 @@ def emit_RNNs(self, IR_node, func): IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" - + code = "{:<15} = layers.{}(units = {}, use_bias = {} {})({})".format( - IR_node.name, + IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, @@ -312,85 +350,79 @@ def emit_GRU(self, IR_node): return self.emit_RNNs(IR_node, "GRU") - def emit_Concat(self, IR_node): - self._emit_merge(IR_node, "concatenate") + def emit_Concat(self, IR_node): + self._emit_merge(IR_node, "concatenate") def emit_BatchNorm(self, IR_node): - axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 + axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 self.add_body(1, "{:<15} = layers.BatchNormalization(name = '{}', axis = {}, epsilon = {}, center = {}, scale = {})({})".format( IR_node.variable_name, - IR_node.name, + IR_node.name, axis, IR_node.layer.attr['epsilon'].f, IR_node.layer.attr['bias'].b, IR_node.layer.attr['scale'].b, self.parent_variable_name(IR_node))) - - + + def emit_Pad(self, IR_node): - if 'mode' not in IR_node.layer.attr or IR_node.IR_layer.attr['mode'].s == b"CONSTANT": + mode = IR_node.get_attr('mode', 'constant') + if mode == "constant": func = "ZeroPadding" else: - print (IR_node.IR_layer.attr['mode'].s) - assert False - - dim = len(IR_node.IR_layer.attr['paddings'].list.i) // 2 - 2 + print (mode) + raise NotImplementedError() - padding_str = str() - for idx in range(1, dim + 1): - padding_str += "({}, {}),".format( - IR_node.IR_layer.attr['paddings'].list.i[idx + idx], - IR_node.IR_layer.attr['paddings'].list.i[idx + idx + 1]) + dim = len(IR_node.get_attr('pads')) // 2 - 2 - self.add_body(1, "{:<15} = layers.{}{}D(name = '{}', padding = ({}))({})".format( + padding = self._convert_padding(IR_node.get_attr('pads')) + self.add_body(1, "{:<15} = layers.{}{}D(name='{}', padding={})({})".format( IR_node.variable_name, func, dim, IR_node.name, - padding_str, - self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) + padding, + self.parent_variable_name(IR_node))) + - def emit_Squeeze(self, IR_node): self.emit_Flatten(IR_node) def emit_ReduceMean(self, IR_node): - axes = ', '.join('%s' % i for i in IR_node.layer.attr['axes'].list.i) - self.add_body(1,"{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims = {}))({})".format( + axes = ', '.join('%s' % i for i in IR_node.get_attr('axes')) + self.add_body(1,"{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims={}))({})".format( IR_node.variable_name, axes, - IR_node.layer.attr['keepdims'].b, + IR_node.get_attr('keepdims'), self.parent_variable_name(IR_node))) def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) - code = "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format( + self.add_body(1, "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format( IR_node.variable_name, - IR_node.layer.attr['size'].i, - IR_node.layer.attr['alpha'].f, - IR_node.layer.attr['beta'].f, - IR_node.layer.attr['k'].f, + IR_node.get_attr('size'), + IR_node.get_attr('alpha'), + IR_node.get_attr('beta'), + IR_node.get_attr('k'), IR_node.name, - self.IR_graph.get_parent(IR_node.name, [0]).variable_name) - - return code + self.parent_variable_name(IR_node))) def emit_SeparableConv(self, IR_node): - assert len(IR_node.layer.attr["strides"].list.i) == 4 + assert len(IR_node.get_attr("strides")) == 4 return self._emit_convolution(IR_node, "layers.SeparableConv2D") - def emit_Relu6(self, IR_node): + def emit_Relu6(self, IR_node): self.add_body(1, "{:<15} = layers.Activation(keras.applications.mobilenet.relu6, name = '{}')({})".format( IR_node.variable_name, IR_node.name, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) - + def emit_DepthwiseConv(self, IR_node): return self._emit_convolution(IR_node, 'keras.applications.mobilenet.DepthwiseConv2D') @@ -406,7 +438,7 @@ def _layer_LRN(self): self.add_body(0, ''' from keras.layers.core import Layer class LRN(Layer): - + def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs): self.n = size self.alpha = alpha diff --git a/mmdnn/conversion/keras/keras2_parser.py b/mmdnn/conversion/keras/keras2_parser.py index d195bfe3..ff2ec039 100644 --- a/mmdnn/conversion/keras/keras2_parser.py +++ b/mmdnn/conversion/keras/keras2_parser.py @@ -10,10 +10,11 @@ import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType from mmdnn.conversion.common.DataStructure.parser import Parser +from mmdnn.conversion.common.utils import * class Keras2Parser(Parser): - + dtype_map = { "float16" : graph_pb2.DT_FLOAT16, "float32" : graph_pb2.DT_FLOAT32, @@ -26,12 +27,15 @@ class Keras2Parser(Parser): } activation_map = { - "relu" : "Relu", - 'softmax' : "Softmax", - 'sigmoid' : "Sigmoid", - "tanh" : "Tanh", - "elu" : "Elu", - "relu6" : "Relu6" + "relu" : "Relu", + 'softmax' : "Softmax", + 'sigmoid' : "Sigmoid", + "tanh" : "Tanh", + "elu" : "Elu", + "relu6" : "Relu6", + 'softplus' : 'Softplus', + 'softsign' : 'Softsign', + 'hard_sigmoid' : 'HardSigmoid' } @@ -115,13 +119,10 @@ def _set_output_shape(source_node, IR_node): shape = graph_pb2.TensorShape() for dim in source_node.layer.output_shape: new_dim = shape.dim.add() - if dim == None: - new_dim.size = -1 - else: - new_dim.size = dim + new_dim.size = dim if dim else -1 IR_node.attr["_output_shapes"].list.shape.extend([shape]) - + @staticmethod def _copy_and_reop(source_node, IR_node, new_op = None): @@ -130,7 +131,7 @@ def _copy_and_reop(source_node, IR_node, new_op = None): if hasattr(source_node.layer, "dtype"): IR_node.attr["dtype"].type = Keras2Parser.dtype_map[source_node.layer.dtype] - + Keras2Parser._set_output_shape(source_node, IR_node) @@ -140,7 +141,7 @@ def _copy_shape(source_node, target_node): for dim in source_node.output_shape: new_dim = target_node.attr["shape"].shape.dim.add() new_dim.size = -1 if dim == None else dim - + else: target_node.attr["shape"].shape.unknown_rank = True @@ -156,31 +157,48 @@ def _convert_dataformat(source_node, target_node): @staticmethod - def _convert_padding(source_node, target_node): - target_node.attr["padding"].s = source_node.keras_layer.padding.upper().encode('utf-8') + def _convert_padding(source_node, IR_node): + # TODO: Fused conv and pool with padding is different from defused operators + dims = len(source_node.layer.input_shape) + if source_node.layer.padding == 'valid': + assign_IRnode_values(IR_node, {'auto_pad' : "VALID", 'pads' : [0, 0] * dims}) + + elif source_node.layer.padding == 'same': + kernel_shape = source_node.layer.kernel_size if hasattr(source_node.layer, 'kernel_size') else source_node.layer.pool_size + padding = compute_tf_same_padding( + source_node.layer.input_shape, + kernel_shape, + list(source_node.layer.strides)) + assign_IRnode_values(IR_node, {'auto_pad' : "SAME_LOWER", 'pads' : padding}) + else: + assert False - def _defuse_activation(self, keras_node): - if keras_node.keras_layer.activation is None or keras_node.keras_layer.activation.__name__ == "linear": + + def _defuse_activation(self, source_node): + if source_node.layer.activation is None or source_node.layer.activation.__name__ == "linear": return IR_node = self.IR_graph.node.add() - IR_node.name = keras_node.name + "_activation" - IR_node.op = Keras2Parser.activation_map[keras_node.layer.activation.__name__] - IR_node.input.append(keras_node.name) - Keras2Parser._set_output_shape(keras_node, IR_node) - - # Kit TODO: More activation functions + IR_node.name = source_node.real_name + "_activation" + IR_node.op = Keras2Parser.activation_map[source_node.layer.activation.__name__] + IR_node.input.append(source_node.real_name) + Keras2Parser._set_output_shape(source_node, IR_node) + + # TODO: More activation functions # for ELU - if hasattr(keras_node.layer, 'alpha'): - IR_node.attr['alpha'].f = keras_node.layer.alpha + if hasattr(source_node.layer, 'alpha'): + assign_attr_value(IR_node['alpha'], source_node.layer.alpha) + + source_node.real_name = IR_node.name - self.src_graph.get_node(keras_node.name).real_name = IR_node.name - def _convert_convolution(self, source_node, dim): IR_node = self.IR_graph.node.add() + # input edge + self.convert_inedge(source_node, IR_node) + # name, op if source_node.type.startswith('Separable'): Keras2Parser._copy_and_reop(source_node, IR_node, "SeparableConv") @@ -190,63 +208,59 @@ def _convert_convolution(self, source_node, dim): else: if source_node.type.startswith('Conv'): - Keras2Parser._copy_and_reop(source_node, IR_node, "Convolution") + Keras2Parser._copy_and_reop(source_node, IR_node, "Conv") elif source_node.type.startswith('Deconv'): - Keras2Parser._copy_and_reop(source_node, IR_node, "Deconvolution") + Keras2Parser._copy_and_reop(source_node, IR_node, "ConvTranspose") elif source_node.type.startswith('Depthwise'): - Keras2Parser._copy_and_reop(source_node, IR_node, "DepthwiseConv") + Keras2Parser._copy_and_reop(source_node, IR_node, "DepthwiseConv") else: raise NotImplementedError("Convolution layer [{}] is not supported.".format(source_node.type)) # weights - if self.weight_loaded == True: + if self.weight_loaded: self.set_weight(source_node.name, "weights", source_node.layer.get_weights()[0]) - if source_node.layer.use_bias == True: + if source_node.layer.use_bias: self.set_weight(source_node.name, "bias", source_node.layer.get_weights()[1]) - # input edge - self.convert_inedge(source_node, IR_node) - - # padding + if isinstance(source_node.layer.kernel_size, int): + source_node.layer.kernel_size = (source_node.layer.kernel_size) * dim + + if isinstance(source_node.layer.strides, int): + source_node.layer.strides = (source_node.layer.strides) * dim + + if isinstance(source_node.layer.dilation_rate, int): + source_node.layer.dilation_rate = (source_node.layer.dilation_rate) * dim + + kwargs = dict() + + # pads Keras2Parser._convert_padding(source_node, IR_node) - + # filter # [kd, kh, kw, channel_size, filter number] - if isinstance(source_node.layer.kernel_size, int): - IR_node.attr["filter"].list.i.extend([source_node.layer.kernel_size] * dim) - else: - IR_node.attr["filter"].list.i.extend(source_node.layer.kernel_size) - in_channel = source_node.layer.input_shape[-1] if self.data_format == "channels_last" else source_node.layer.input_shape[1] out_channel = source_node.layer.filters or source_node.layer.depth_multiplier - + if source_node.type.startswith("Deconv"): - IR_node.attr["filter"].list.i.extend([out_channel, in_channel]) + kwargs['kernel_shape'] = list(source_node.layer.kernel_size) + [out_channel, in_channel] else: - IR_node.attr["filter"].list.i.extend([in_channel, out_channel]) - + kwargs['kernel_shape'] = list(source_node.layer.kernel_size) + [in_channel, out_channel] + # use_bias - IR_node.attr["use_bias"].b = source_node.keras_layer.use_bias + kwargs['use_bias'] = source_node.keras_layer.use_bias # strides # [1, sd, sh, sw, 1] - IR_node.attr["strides"].list.i.append(1) - if isinstance(source_node.layer.kernel_size, int): - IR_node.attr["strides"].list.i.extend([source_node.layer.strides] * dim) - else: - IR_node.attr["strides"].list.i.extend(source_node.layer.strides) - IR_node.attr['strides'].list.i.append(1) - + kwargs['strides'] = [1] + list(source_node.layer.strides) + [1] + # dilations - IR_node.attr['dilation_rate'].list.i.append(1) - if isinstance(source_node.layer.dilation_rate, int): - IR_node.attr["dilation_rate"].list.i.extend([source_node.layer.dilation_rate] * dim) - else: - IR_node.attr["dilation_rate"].list.i.extend(source_node.layer.dilation_rate) - IR_node.attr['dilation_rate'].list.i.append(1) + # [1, dd, dh, dw, 1] + kwargs['dilations'] = [1] + list(source_node.layer.dilation_rate) + [1] + + assign_IRnode_values(IR_node, kwargs) # activation self._defuse_activation(source_node) @@ -261,38 +275,46 @@ def _convert_pooling(self, source_node, dim, pooling_type, is_global): # input edge self.convert_inedge(source_node, IR_node) - IR_node.attr['pooling_type'].s = pooling_type.encode('utf-8') - + kwargs = {} + + kwargs['pooling_type'] = pooling_type + if is_global: - IR_node.attr['global_pooling'].b = True - IR_node.attr["strides"].list.i[:] = [1] * (dim + 2) # for saving dim + kwargs['global_pooling'] = True + kwargs['strides'] = [1] * (dim + 2) else: + if isinstance(source_node.layer.pool_size, int): + source_node.layer.pool_size = (source_node.layer.pool_size) * dim + + if isinstance(source_node.layer.strides, int): + source_node.layer.strides = (source_node.layer.strides) * dim + # padding - Keras2Parser._convert_padding(source_node, IR_node) + self._convert_padding(source_node, IR_node) # strides # [1, sd, sh, sw, 1] - IR_node.attr["strides"].list.i.append(1) - if isinstance(source_node.layer.strides, int): - IR_node.attr["strides"].list.i.extend([source_node.layer.strides] * dim) - else: - IR_node.attr["strides"].list.i.extend(source_node.layer.strides) - IR_node.attr['strides'].list.i.append(1) + kwargs['strides'] = [1] + list(source_node.layer.strides) + [1] # window_shape # [1, pd, ph, pw, 1] - IR_node.attr["window_shape"].list.i.append(1) - if isinstance(source_node.layer.pool_size, int): - IR_node.attr["window_shape"].list.i.extend([source_node.layer.pool_size] * dim) - else: - IR_node.attr["window_shape"].list.i.extend(source_node.layer.pool_size) - IR_node.attr["window_shape"].list.i.append(1) - - + kwargs['kernel_shape'] = [1] + list(source_node.layer.pool_size) + [1] + + assign_IRnode_values(IR_node, kwargs) + + if is_global: + flatten_node = self.IR_graph.node.add() + flatten_node.name = source_node.name + '_flatten' + flatten_node.op = 'Flatten' + flatten_node.input.append(source_node.name) + Keras2Parser._set_output_shape(source_node, flatten_node) + source_node.real_name = flatten_node.name + + def _convert_merge(self, source_node, new_name = None): IR_node = self.IR_graph.node.add() - # name, op + # name, op Keras2Parser._copy_and_reop(source_node, IR_node, new_name) # input edge @@ -303,52 +325,52 @@ def _convert_merge(self, source_node, new_name = None): IR_node.attr['axis'].i = source_node.layer.axis return IR_node - - def _convert_padding_api(self, keras_node, IR_node, mode): + + def _convert_padding_api(self, source_node, IR_node, mode): # name, op - Keras2Parser._copy_and_reop(keras_node, IR_node, "Pad") + Keras2Parser._copy_and_reop(source_node, IR_node, "Pad") # input edge - self.convert_inedge(keras_node, IR_node) - - IR_node.attr['mode'].s = mode + self.convert_inedge(source_node, IR_node) + + kwargs = dict() + kwargs['mode'] = mode # padding - IR_node.attr["paddings"].list.i.extend([0, 0]) - for e in keras_node.keras_layer.padding: - for j in e: - IR_node.attr["paddings"].list.i.append(j) - IR_node.attr["paddings"].list.i.extend([0, 0]) + kwargs['pads'] = [0, 0] + for padding_pair in source_node.layer.padding: + kwargs['pads'].extend(padding_pair) + kwargs['pads'] += [0, 0] + kwargs['pads'] = convert_tf_pad_to_onnx(kwargs['pads']) + IR_node.set_attrs(kwargs) + print (IR_node) + assert False def rename_UNKNOWN(self, source_node): # only for training IR_node = self.IR_graph.node.add() - + # name, op Keras2Parser._copy_and_reop(source_node, IR_node) - + # input edge self.convert_inedge(source_node, IR_node) - - # Merge Layers - def rename_Add(self, source_node): - self._convert_merge(source_node) - - def rename_InputLayer(self, source_node): - # only for training + def rename_Activation(self, keras_node): IR_node = self.IR_graph.node.add() - + # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, "DataInput") - + Keras2Parser._copy_and_reop(keras_node, IR_node, self.activation_map[keras_node.keras_layer.activation.__name__]) + # input edge - self.convert_inedge(source_node, IR_node) + self.convert_inedge(keras_node, IR_node) - # shape - Keras2Parser._copy_shape(source_node.keras_layer, IR_node) + + # Merge Layers + def rename_Add(self, source_node): + self._convert_merge(source_node) def rename_Conv1D(self, source_node): @@ -361,8 +383,23 @@ def rename_Conv2D(self, source_node): def rename_Conv3D(self, source_node): self._convert_convolution(source_node, 3) - - + + + def rename_InputLayer(self, source_node): + # only for training + IR_node = self.IR_graph.node.add() + + # name, op + Keras2Parser._copy_and_reop(source_node, IR_node, "DataInput") + + # input edge + self.convert_inedge(source_node, IR_node) + + # shape + Keras2Parser._copy_shape(source_node.keras_layer, IR_node) + + + def rename_GlobalMaxPooling1D(self, source_node): self._convert_pooling(source_node, 1, "MAX", True) @@ -373,7 +410,7 @@ def rename_GlobalMaxPooling2D(self, source_node): def rename_GlobalMaxPooling3D(self, source_node): self._convert_pooling(source_node, 3, "MAX", True) - + def rename_GlobalAveragePooling1D(self, source_node): self._convert_pooling(source_node, 1, "AVG", True) @@ -406,10 +443,10 @@ def rename_AveragePooling1D(self, source_node): def rename_AveragePooling2D(self, source_node): self._convert_pooling(source_node, 2, "AVG", False) - + def rename_AveragePooling3D(self, source_node): self._convert_pooling(source_node, 3, "AVG", False) - + def rename_Dropout(self, source_node): # only for training @@ -424,7 +461,7 @@ def rename_Dropout(self, source_node): IR_node.attr["keep_prob"].f = source_node.keras_layer.rate if source_node.keras_layer.seed != None: IR_node.attr["seed"].i = source_node.keras_layer.seed - + # Core Layers def rename_Dense(self, source_node): @@ -432,7 +469,7 @@ def rename_Dense(self, source_node): # name, op Keras2Parser._copy_and_reop(source_node, IR_node, "FullyConnected") - + # input edge self.convert_inedge(source_node, IR_node) @@ -442,12 +479,12 @@ def rename_Dense(self, source_node): # use_bias IR_node.attr["use_bias"].b = source_node.keras_layer.use_bias - # weights - if self.weight_loaded == True: + # weights + if self.weight_loaded == True: self.set_weight(source_node.name, 'weights', source_node.layer.get_weights()[0]) if IR_node.attr["use_bias"].b == True: self.set_weight(source_node.name, 'bias', source_node.layer.get_weights()[1]) - + # activation self._defuse_activation(source_node) @@ -462,22 +499,12 @@ def rename_Flatten(self, source_node): self.convert_inedge(source_node, IR_node) - def rename_Activation(self, keras_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(keras_node, IR_node, self.activation_map[keras_node.keras_layer.activation.__name__]) - - # input edge - self.convert_inedge(keras_node, IR_node) - - def rename_Embedding(self, source_node): IR_node = self.IR_graph.node.add() # name, op Keras2Parser._copy_and_reop(source_node, IR_node) - + # input edge self.convert_inedge(source_node, IR_node) @@ -499,7 +526,7 @@ def rename_LSTM(self, keras_node): # name, op Keras2Parser._copy_and_reop(keras_node, IR_node) - + # input edge self.convert_inedge(keras_node, IR_node) @@ -522,7 +549,7 @@ def rename_GRU(self, source_node): # name, op Keras2Parser._copy_and_reop(source_node, IR_node) - + # input edge self.convert_inedge(source_node, IR_node) @@ -531,7 +558,7 @@ def rename_GRU(self, source_node): # activation self._defuse_activation(source_node) - + def rename_Multiply(self, source_node): self._convert_merge(source_node, 'Mul') @@ -547,7 +574,7 @@ def rename_Maximum(self, source_node): def rename_Concatenate(self, source_node): - IR_node = self._convert_merge(source_node, 'Concat') + IR_node = self._convert_merge(source_node, 'Concat') def rename_Reshape(self, source_node): @@ -555,11 +582,11 @@ def rename_Reshape(self, source_node): # name, op Keras2Parser._copy_and_reop(source_node, IR_node, 'Reshape') - + # input edge self.convert_inedge(source_node, IR_node) - # for target shape + # for target shape IR_node.attr["shape"].list.i.append(-1) IR_node.attr["shape"].list.i.extend(source_node.layer.target_shape) @@ -569,7 +596,7 @@ def rename_Lambda(self, source_node): # name, op Keras2Parser._copy_and_reop(source_node, IR_node, "Keras Lambda") - + # input edge self.convert_inedge(source_node, IR_node) @@ -586,7 +613,7 @@ def rename_Lambda(self, source_node): - def rename_BatchNormalization(self, keras_node): + def rename_BatchNormalization(self, keras_node): IR_node = self.IR_graph.node.add() # name, op @@ -597,7 +624,7 @@ def rename_BatchNormalization(self, keras_node): # axis IR_node.attr['axis'].i = keras_node.keras_layer.axis - + IR_node.attr['scale'].b = keras_node.keras_layer.scale IR_node.attr['bias'].b = keras_node.keras_layer.center @@ -627,7 +654,7 @@ def rename_BatchNormalization(self, keras_node): def rename_ZeroPadding2D(self, keras_node): IR_node = self.IR_graph.node.add() - self._convert_padding_api(keras_node, IR_node, "CONSTANT") + self._convert_padding_api(keras_node, IR_node, "constant") def rename_SeparableConv2D(self, source_node): @@ -639,4 +666,4 @@ def rename_DepthwiseConv2D(self, source_node): def custom_relu6(x): - return _keras.relu(x, max_value = 6) + return _keras.relu(x, max_value=6) diff --git a/mmdnn/conversion/pytorch/pytorch_emitter.py b/mmdnn/conversion/pytorch/pytorch_emitter.py index 2b4901ff..1b732414 100644 --- a/mmdnn/conversion/pytorch/pytorch_emitter.py +++ b/mmdnn/conversion/pytorch/pytorch_emitter.py @@ -11,9 +11,10 @@ import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType from mmdnn.conversion.common.DataStructure.emitter import Emitter +from mmdnn.conversion.common.utils import * class PytorchEmitter(Emitter): - + dtype_map = { graph_pb2.DT_FLOAT16 : "float16", graph_pb2.DT_FLOAT32 : "float32", @@ -26,7 +27,7 @@ class PytorchEmitter(Emitter): } # Base Functions - def __init__(self, model): + def __init__(self, model): super(PytorchEmitter, self).__init__() if isinstance(model, _string_types): network_path = model @@ -65,7 +66,7 @@ def header_code(self): def load_weights(weight_file): if weight_file == None: return - + try: weights_dict = np.load(weight_file).item() except: @@ -73,7 +74,7 @@ def load_weights(weight_file): return weights_dict -class KitModel(nn.Module): +class KitModel(nn.Module): """ def gen_code(self, phase): @@ -83,10 +84,10 @@ def __init__(self, weight_file): global __weights_dict __weights_dict = load_weights(weight_file) """) - + self.add_body(1, "def forward(self, x):") - for layer in self.IR_graph.topological_sort: + for layer in self.IR_graph.topological_sort: current_node = self.IR_graph.get_node(layer) node_type = current_node.type @@ -105,89 +106,97 @@ def __init__(self, weight_file): for i in self.used_layers: func = getattr(self, "_layer_" + i) func() - - return self.header_code + '\n' + self.init_code + '\n' + self.body_codes - - def emit_Convolution(self, IR_node): - # https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py - self.used_layers.add(IR_node.type) + return self.header_code + '\n' + self.init_code + '\n' + self.body_code - dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 - in_channels = IR_node.IR_layer.attr["filter"].list.i[-2] - - filter = IR_node.IR_layer.attr["filter"].list.i[-1] - - kernel = IR_node.IR_layer.attr["filter"].list.i[:-2] + def _defuse_padding(self, IR_node, extra_str = ""): + input_node = self.parent_variable_name(IR_node) + if IR_node.get_attr('auto_pad') == 'VALID': + return input_node - strides = IR_node.IR_layer.attr["strides"].list.i[1:-1] + if is_valid_padding(IR_node.get_attr("pads")) == True: + return input_node - use_bias = IR_node.IR_layer.attr["use_bias"].b + padding = self._convert_padding(IR_node) + input_node = IR_node.variable_name + '_pad' + self.add_body(2, "{:<15} = F.pad({}, {}{})".format( + input_node, + self.parent_variable_name(IR_node), + padding, + extra_str + )) - if IR_node.IR_layer.attr["padding"].s == b'VALID': - padding = 0 - else: - padding = 1 - - self.add_init(2, "self.{} = self.conv({}, name = '{}', in_channels = {}, out_channels = {}, kernel_size = ({}), stride = ({}), padding = {}, bias = {})".format( + return input_node + + + def emit_Conv(self, IR_node): + # https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py + self.used_layers.add(IR_node.type) + + dim = len(IR_node.get_attr('strides')) - 2 + + in_channels = IR_node.get_attr('kernel_shape')[-2] + filter = IR_node.get_attr('kernel_shape')[-1] + kernel = IR_node.get_attr('kernel_shape')[:-2] + strides = IR_node.get_attr('strides')[1:-1] + + self.add_init(2, "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size=({}), stride=({}), bias={})".format( IR_node.variable_name, dim, - IR_node.name, - in_channels, - filter, - ','.join('%s' % id for id in kernel), - ','.join('%s' % id for id in strides), - padding, - use_bias)) - + IR_node.name, + in_channels, + filter, + ', '.join('%s' % id for id in kernel), + ', '.join('%s' % id for id in strides), + # padding, + IR_node.get_attr('use_bias'))) + + input_node = self._defuse_padding(IR_node) self.add_body(2, "{:<15} = self.{}({})".format( IR_node.variable_name, IR_node.variable_name, - self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) + input_node)) if self.weight_loaded: self.weights_dict[IR_node.name]['weights'] = np.transpose(self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) - - def emit_Pool(self, IR_node): - dim = len(IR_node.IR_layer.attr["strides"].list.i) - 2 - if IR_node.layer.attr['pooling_type'].s == b"MAX": + def emit_Pool(self, IR_node): + dim = len(IR_node.get_attr('strides')) - 2 + + if IR_node.get_attr('pooling_type') == "MAX": pool_name = "max_pool{}d".format(dim) - elif IR_node.layer.attr['pooling_type'].s == b"AVG": + exstr = ", value=float('-Inf')" + elif IR_node.get_attr('pooling_type') == "AVG": pool_name = "avg_pool{}d".format(dim) + exstr = "" else: assert False - + if IR_node.layer.attr['global_pooling'].b: self.add_body(2, "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])".format( IR_node.variable_name, pool_name, self.parent_variable_name(IR_node), self.parent_variable_name(IR_node) - )) - + )) + else: - for e in IR_node.IR_layer.attr["dilation_rate"].list.i: + for e in IR_node.get_attr('dilations', []): assert e == 1 - - if IR_node.IR_layer.attr["padding"].s == b'VALID': - padding = 0 - else: - # Kit TODO: to handle padding - padding = 1 - pool_size = IR_node.IR_layer.attr['window_shape'].list.i[1:-1] - strides = IR_node.IR_layer.attr['strides'].list.i[1:-1] - - self.add_body(2, "{:<15} = F.{}(input = {}, kernel_size = ({}), stride = ({}), padding = {})".format( + pool_size = IR_node.get_attr('kernel_shape')[1:-1] + strides = IR_node.get_attr('strides')[1:-1] + + input_node = self._defuse_padding(IR_node, exstr) + self.add_body(2, "{:<15} = F.{}({}, kernel_size={}, stride={})".format( IR_node.variable_name, pool_name, - self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name, - ','.join([str(id) for id in pool_size]), - ','.join([str(id) for id in strides]), - padding)) + input_node, + tuple(pool_size), + tuple(strides) + )) def emit_UNKNOWN(self, IR_node): @@ -196,7 +205,7 @@ def emit_UNKNOWN(self, IR_node): def emit_DataInput(self, IR_node): # Ignore it in Pytorch - IR_node.real_name = 'x' + IR_node.real_name = 'x' def emit_Dropout(self, IR_node): @@ -223,7 +232,7 @@ def emit_FullyConnected(self, IR_node): in_features = 1 for i in self.IR_graph.get_parent(IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]: in_features *= i.size - + self.add_init(2, "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})".format( IR_node.variable_name, IR_node.name, @@ -231,10 +240,13 @@ def emit_FullyConnected(self, IR_node): IR_node.layer.attr["units"].i, IR_node.IR_layer.attr["use_bias"].b)) + input_node = self.parent_variable_name(IR_node) + if len(self.IR_graph.get_parent(IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2: + input_node = "{}.view({}.size(0), -1)".format(input_node, input_node) self.add_body(2, "{:<15} = self.{}({})".format( IR_node.variable_name, - IR_node.variable_name, - self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) + IR_node.variable_name, + input_node)) if self.weight_loaded: self.check_if_need_transpose(IR_node) @@ -250,15 +262,17 @@ def emit_Flatten(self, IR_node): def emit_Reshape(self, IR_node): - shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape, True) + raise NotImplementedError + shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape, True) self.add_body(1, "{:<15} = Reshape(name = \"{}\", target_shape = ({}))({})".format( IR_node.variable_name, - IR_node.name, - shape_str, + IR_node.name, + shape_str, self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name)) def emit_Tanh(self, IR_node): + raise NotImplementedError() code = "{:<15} = Activation(name = '{}', activation = 'tanh')({})".format( IR_node.replace_scope(IR_node.name), IR_node.name, @@ -267,20 +281,20 @@ def emit_Tanh(self, IR_node): def emit_Relu(self, IR_node): - self.add_body(2, "{:<15} = F.relu({})".format( - IR_node.variable_name, + self.add_body(2, "{:<15} = F.relu({})".format( + IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) - + def emit_Softmax(self, IR_node): - self.add_body(2, "{:<15} = F.softmax({})".format( - IR_node.variable_name, + self.add_body(2, "{:<15} = F.softmax({})".format( + IR_node.variable_name, self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name)) def emit_Sigmoid(self, IR_node): code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( - IR_node.replace_scope(IR_node.name), + IR_node.replace_scope(IR_node.name), IR_node.name, IR_node.replace_scope(IR_node.in_edges[0])) return code @@ -289,7 +303,7 @@ def emit_Sigmoid(self, IR_node): def emit_Embedding(self, IR_node): raise NotImplementedError() ret = "{:<15} = Embedding(input_dim = {}, output_dim = {}, mask_zero = {})({})".format( - IR_node.name, + IR_node.name, IR_node.IR_layer.attr['input_dim'].i, IR_node.IR_layer.attr['output_dim'].i, IR_node.IR_layer.attr['mask_zero'].b, @@ -307,9 +321,9 @@ def emit_RNNs(self, IR_node, func): IR_node.IR_layer.attr['recurrent_dropout'].f) else: dropout_str = "" - + code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( - IR_node.name, + IR_node.name, func, IR_node.IR_layer.attr['units'].i, IR_node.IR_layer.attr['use_bias'].b, @@ -333,11 +347,23 @@ def emit_Add(self, IR_node): '+ '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges))) + @staticmethod + def _convert_axis(IR_node, axis): + ndim = len(IR_node.get_attr('_output_shapes')[0].dim) + if axis == 0: + return 0 + elif axis == ndim - 1: + return 1 + else: + return axis + 1 + + def emit_Concat(self, IR_node): + axis = self._convert_axis(IR_node, IR_node.get_attr('axis')) self.add_body(2, "{:<15} = torch.cat(({}), {})".format( IR_node.variable_name, ', '.join(self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges), - IR_node.layer.attr['axis'].i, + axis, )) @@ -346,7 +372,7 @@ def emit_BatchNorm(self, IR_node): dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 self.add_init(2, "self.{} = self.__batch_normalization({}, '{}', num_features = {}, eps = {}, momentum = {})".format( - IR_node.variable_name, + IR_node.variable_name, dim, IR_node.name, IR_node.layer.attr['_output_shapes'].list.shape[0].dim[-1].size, @@ -367,26 +393,48 @@ def emit_Squeeze(self, IR_node): )) - def emit_Pad(self, IR_node): - if IR_node.layer.attr['mode'].s == b'CONSTANT': + @staticmethod + def _convert_padding(IR_node): + padding = IR_node.get_attr('pads') + padding = convert_onnx_pad_to_tf(padding)[1:-1] + new_padding = [] + for pad in padding: + new_padding.insert(0, pad) + return tuple(np.array(new_padding).reshape(-1).tolist()) + + + def emit_Pad(self, IR_node): + if IR_node.get_attr('mode') == 'constant': mode = "mode = 'constant', value = {}".format(0) - elif IR_node.layer.attr['mode'].s == b'REFLECT': + elif IR_node.get_attr('mode') == 'reflect': mode = "mode = 'reflect'" - elif IR_node.layer.attr['mode'].s == b'SYMMETRIC': + elif IR_node.get_attr('mode') == 'SYMMETRIC': mode = "mode = 'replicate'" else: assert False - padding_str = ', '.join('%s' % i for i in IR_node.layer.attr['paddings'].list.i[2:-2]) - - self.add_body(2, "{:<15} = F.pad({}, ({}), {})".format( + padding = self._convert_padding(IR_node) + self.add_body(2, "{:<15} = F.pad({}, {}, {})".format( IR_node.variable_name, - self.parent_variable_name(IR_node), - padding_str, + self.parent_variable_name(IR_node), + padding, mode)) - def emit_LRN(self, IR_node): + def emit_ReduceMean(self, IR_node): + axes = [self._convert_axis(IR_node, x) for x in IR_node.get_attr('axes')] + input_node = self.parent_variable_name(IR_node) + for axis in sorted(axes, reverse=True): + self.add_body(2, "{:<15} = torch.mean({}, {}, {})".format( + IR_node.variable_name, + input_node, + axis, + IR_node.get_attr("keepdims") + )) + input_node = IR_node.variable_name + + + def emit_LRN(self, IR_node): self.used_layers.add(IR_node.type) self.add_body(2, "{:<15} = self.LRN(size = {}, alpha = {}, beta = {})({})".format( IR_node.variable_name, @@ -394,18 +442,18 @@ def emit_LRN(self, IR_node): IR_node.layer.attr['alpha'].f, IR_node.layer.attr['beta'].f, self.parent_variable_name(IR_node) - )) + )) - def _layer_Convolution(self): + def _layer_Conv(self): self.add_body(0, """ @staticmethod - def conv(dim, name, **kwargs): + def __conv(dim, name, **kwargs): if dim == 1: layer = nn.Conv1d(**kwargs) elif dim == 2: layer = nn.Conv2d(**kwargs) elif dim == 3: layer = nn.Conv3d(**kwargs) else: raise NotImplementedError() - + layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) if 'bias' in __weights_dict[name]: layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) @@ -431,7 +479,7 @@ def __batch_normalization(dim, name, **kwargs): elif dim == 2: layer = nn.BatchNorm2d(**kwargs) elif dim == 3: layer = nn.BatchNorm3d(**kwargs) else: raise NotImplementedError() - + if 'scale' in __weights_dict[name]: layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) else: @@ -454,16 +502,16 @@ def __init__(self, size=1, alpha=1.0, beta=0.75, ACROSS_CHANNELS=False): super(KitModel.LRN, self).__init__() self.ACROSS_CHANNELS = ACROSS_CHANNELS if self.ACROSS_CHANNELS: - self.average=nn.AvgPool3d(kernel_size=(size, 1, 1), + self.average=nn.AvgPool3d(kernel_size=(size, 1, 1), stride=1, - padding=(int((size-1.0)/2), 0, 0)) + padding=(int((size-1.0)/2), 0, 0)) else: self.average=nn.AvgPool2d(kernel_size=size, stride=1, padding=int((size-1.0)/2)) self.alpha = alpha self.beta = beta - + def forward(self, x): if self.ACROSS_CHANNELS: div = x.pow(2).unsqueeze(1) @@ -474,4 +522,4 @@ def forward(self, x): div = self.average(div) div = div.mul(self.alpha).add(1.0).pow(self.beta) x = x.div(div) - return x""") \ No newline at end of file + return x""") diff --git a/mmdnn/conversion/tensorflow/tensorflow_emitter.py b/mmdnn/conversion/tensorflow/tensorflow_emitter.py index 169e4c7d..bd45afce 100644 --- a/mmdnn/conversion/tensorflow/tensorflow_emitter.py +++ b/mmdnn/conversion/tensorflow/tensorflow_emitter.py @@ -9,10 +9,11 @@ import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType from mmdnn.conversion.common.DataStructure.emitter import Emitter +from mmdnn.conversion.common.utils import * class TensorflowEmitter(Emitter): - + dtype_map = { graph_pb2.DT_FLOAT16 : "tf.float16", graph_pb2.DT_FLOAT32 : "tf.float32", @@ -22,7 +23,8 @@ class TensorflowEmitter(Emitter): graph_pb2.DT_INT64 : "tf.int64", graph_pb2.DT_UINT8 : "tf.uint8", graph_pb2.DT_UINT16 : "tf.uint16" - } + } + @property def header_code(self): @@ -34,10 +36,10 @@ def header_code(self): def load_weights(weight_file): import numpy as np - + if weight_file == None: return - + try: weights_dict = np.load(weight_file).item() except: @@ -46,7 +48,7 @@ def load_weights(weight_file): return weights_dict -def KitModel(weight_file = None): +def KitModel(weight_file = None): global __weights_dict __weights_dict = load_weights(weight_file) """.format(self.trainable) @@ -54,17 +56,17 @@ def KitModel(weight_file = None): def __init__(self, model): super(TensorflowEmitter, self).__init__() - + from six import string_types as _string_types if isinstance(model, _string_types): network_path = model else: network_path = model[0] self._load_weights(model[1]) - + self.IR_graph = IRGraph(network_path) super(TensorflowEmitter, self)._build() - + def gen_code(self, phase): self.trainable = (phase == 'train') @@ -81,7 +83,7 @@ def gen_code(self, phase): print("TensorflowEmitter has not supported operator [%s]." % (node_type)) self.emit_UNKNOWN(current_node) - self.add_body(1, "return {}, {}\n".format( + self.add_body(1, "return {}, {}".format( ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers]), ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) @@ -95,26 +97,52 @@ def gen_code(self, phase): @staticmethod def _shapeToStr(shapes): - ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim] + ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim] return ', '.join('%s' % i for i in ret) - def emit_Convolution(self, IR_node): + def emit_Conv(self, IR_node): self.used_layers.add(IR_node.type) - strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]) - code = "{:<15} = convolution({}, strides = [{}], padding = '{}', name = '{}')".format( + strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')[1:-1]) + input_node, padding = self._defuse_padding(IR_node) + self.add_body(1, "{:<15} = convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, - self.parent_variable_name(IR_node), + input_node, strides_str, - IR_node.layer.attr['padding'].s.decode('utf-8'), - IR_node.name) + padding, + IR_node.name)) + + + def _defuse_padding(self, IR_node): + auto_pad = IR_node.get_attr('auto_pad') + if auto_pad: + input_node = self.parent_variable_name(IR_node) + if auto_pad == 'VALID': + padding = 'VALID' + elif auto_pad.startswith("SAME"): + padding = 'SAME' + else: + assert False + return input_node, padding + + else: + padding = IR_node.get_attr("pads") + padding = convert_onnx_pad_to_tf(padding) + if is_valid_padding(padding) == False: + input_node = IR_node.variable_name + '_pad' + self.add_body(1, "{:<15} = tf.pad({}, paddings = {})".format( + input_node, + self.parent_variable_name(IR_node), + padding)) + else: + input_node = self.parent_variable_name(IR_node) + + return input_node, 'VALID' - self.add_body(1, code) - def emit_Pool(self, IR_node): op = 'max_pool' if IR_node.layer.attr['pooling_type'].s == b'MAX' else 'avg_pool' - arrlen = len(IR_node.layer.attr['strides'].list.i) + arrlen = len(IR_node.get_attr('strides')) dim_str = '3d' if arrlen == 5 else "" if IR_node.layer.attr['global_pooling'].b: @@ -126,19 +154,21 @@ def emit_Pool(self, IR_node): self.parent_variable_name(IR_node), arrlen, IR_node.name)) - - else: - kernel_shape_str = ', '.join('%s' % i for i in IR_node.layer.attr['window_shape'].list.i) - strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) - - self.add_body(1, "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding = '{}', name = '{}')".format( + + else: + kernel_shape_str = ', '.join('%s' % i for i in IR_node.get_attr('kernel_shape')) + strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) + + input_node, padding = self._defuse_padding(IR_node) + + self.add_body(1, "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')".format( IR_node.variable_name, op, dim_str, - self.parent_variable_name(IR_node), + input_node, kernel_shape_str, - strides_str, - IR_node.layer.attr['padding'].s.decode('utf-8'), + strides_str, + padding, IR_node.name)) @@ -154,11 +184,11 @@ def emit_DataInput(self, IR_node): dtype_str = "{}, ".format(self.dtype_map[IR_node.layer.attr['dtype'].type]) else: dtype_str = "tf.float32," - + code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format( IR_node.variable_name, dtype_str, shape_str, IR_node.name ) - + self.add_body(1, code) @@ -172,9 +202,9 @@ def emit_Dropout(self, IR_node): parent.real_variable_name)) else: IR_node.real_name = parent.real_name - - def emit_FullyConnected(self, IR_node): + + def emit_FullyConnected(self, IR_node): if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[IR_node.name]: kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format(IR_node.name) else: kernel_str = "" @@ -200,13 +230,13 @@ def emit_Flatten(self, IR_node): self.parent_variable_name(IR_node))) - def emit_Reshape(self, IR_node): + def emit_Reshape(self, IR_node): self.add_body(1, "{:<15} = tf.reshape({}, [{}], '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), - ', '.join('%s' % i for i in IR_node.layer.attr["shape"].list.i), + ', '.join('%s' % i for i in IR_node.get_attr('shape')), IR_node.name)) - + def _emit_unary_operation(self, IR_node, op_name): self.add_body(1, "{:<15} = tf.{}({}, name = '{}')".format( @@ -226,11 +256,11 @@ def emit_Elu(self, IR_node): def emit_Relu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu') - + def emit_Relu6(self, IR_node): self._emit_unary_operation(IR_node, 'nn.relu6') - + def emit_CRelu(self, IR_node): self._emit_unary_operation(IR_node, 'nn.crelu') @@ -283,31 +313,31 @@ def emit_Concat(self, IR_node): def emit_BatchNorm(self, IR_node): self.used_layers.add(IR_node.type) - self.add_body(1, "{:<15} = batch_normalization({}, variance_epsilon = {}, name = '{}')".format( + self.add_body(1, "{:<15} = batch_normalization({}, variance_epsilon={}, name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), - IR_node.layer.attr['epsilon'].f, + IR_node.get_attr('epsilon'), IR_node.name)) def emit_Pad(self, IR_node): - padding_str = ', '.join('[%s, %s]' % - (IR_node.layer.attr['paddings'].list.i[idx], - IR_node.layer.attr['paddings'].list.i[idx + 1]) - for idx in range(0, len(IR_node.layer.attr['paddings'].list.i), 2)) - - mode_str = "" - if 'mode' in IR_node.layer.attr: - mode_str = ", mode = '{}'".format(IR_node.layer.attr['mode'].s.decode('utf-8')) - - code = "{:<15} = tf.pad({}, paddings = ({}){}, name = '{}')".format( + padding = IR_node.get_attr('pads') + padding = convert_onnx_pad_to_tf(padding) + + mode = IR_node.get_attr('mode', 'constant') + if mode == 'constant' or mode == 'reflect': + mode = mode.upper() + elif mode == 'edge': + mode = 'SYMMETRIC' + else: + raise NotImplementedError("Not support padding mode {}.".format(mode)) + + self.add_body(1, "{:<15} = tf.pad({}, {}, '{}', name='{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), - padding_str, - mode_str, - IR_node.variable_name - ) - self.add_body(1, code) + padding, + mode, + IR_node.variable_name)) def emit_Squeeze(self, IR_node): @@ -322,44 +352,46 @@ def emit_ReduceMean(self, IR_node): self.add_body(1, "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format( IR_node.variable_name, self.parent_variable_name(IR_node), - ','.join('%s' % i for i in IR_node.layer.attr['axes'].list.i), - IR_node.layer.attr['keepdims'].b, + ','.join('%s' % i for i in IR_node.get_attr('axes')), + IR_node.get_attr('keepdims'), IR_node.name)) def emit_LRN(self, IR_node): self.add_body(1, "{:<15} = tf.nn.lrn({}, {}, alpha = {}, beta = {}, name = '{}')".format( - IR_node.variable_name, + IR_node.variable_name, self.parent_variable_name(IR_node), - IR_node.layer.attr['size'].i - 1, + IR_node.get_attr('size') - 1, IR_node.layer.attr['alpha'].f / (IR_node.layer.attr['size'].i * 2 - 1), - IR_node.layer.attr['beta'].f, + IR_node.get_attr('beta'), IR_node.name)) - + def emit_SeparableConv(self, IR_node): self.used_layers.add(IR_node.type) - strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) + strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) + input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, - self.parent_variable_name(IR_node), + input_node, strides_str, - IR_node.layer.attr['padding'].s.decode('utf-8'), + padding, IR_node.name)) def emit_DepthwiseConv(self, IR_node): self.used_layers.add(IR_node.type) strides_str = ', '.join('%s' % i for i in IR_node.layer.attr['strides'].list.i) + input_node, padding = self._defuse_padding(IR_node) self.add_body(1, "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( IR_node.variable_name, - self.parent_variable_name(IR_node), + input_node, strides_str, - IR_node.layer.attr['padding'].s.decode('utf-8'), + padding, IR_node.name)) - - def _layer_Convolution(self): + + def _layer_Conv(self): self.add_body(0, """ def convolution(input, name, **kwargs): w = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_weight") @@ -396,7 +428,7 @@ def separable_convolution(input, name, **kwargs): def _layer_DepthwiseConv(self): self.add_body(0, """ def depthwise_convolution(input, name, **kwargs): - depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df") + depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df") layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs) if 'bias' in __weights_dict[name]: b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") diff --git a/mmdnn/conversion/tensorflow/tensorflow_graph.py b/mmdnn/conversion/tensorflow/tensorflow_graph.py index 888b37b2..3472ee01 100644 --- a/mmdnn/conversion/tensorflow/tensorflow_graph.py +++ b/mmdnn/conversion/tensorflow/tensorflow_graph.py @@ -5,6 +5,7 @@ from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph from tensorflow.core.framework.node_def_pb2 import NodeDef +from tensorflow.core.framework import attr_value_pb2 class TensorflowGraphNode(GraphNode): @@ -33,7 +34,10 @@ def get_attr(self, name, default_value = None): attr = self.layer.attr[name] field = attr.WhichOneof('value') val = getattr(attr, field) if field else default_value - return val + if isinstance(val, attr_value_pb2.AttrValue.ListValue): + return list(val.ListFields()[0][1]) + else: + return val.decode('utf-8') if isinstance(val, bytes) else val else: return default_value diff --git a/mmdnn/conversion/tensorflow/tensorflow_parser.py b/mmdnn/conversion/tensorflow/tensorflow_parser.py index 9a92136f..8d462c84 100644 --- a/mmdnn/conversion/tensorflow/tensorflow_parser.py +++ b/mmdnn/conversion/tensorflow/tensorflow_parser.py @@ -3,13 +3,15 @@ # Licensed under the MIT License. See License.txt in the project root for license information. #---------------------------------------------------------------------------------------------- -import tensorflow import numpy as np +import tensorflow +from tensorflow.python.framework import tensor_util +from tensorflow.core.framework import attr_value_pb2 from mmdnn.conversion.tensorflow.tensorflow_graph import TensorflowGraph import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType +from mmdnn.conversion.common.utils import * from mmdnn.conversion.common.DataStructure.parser import Parser -from tensorflow.python.framework import tensor_util class TensorflowParser(Parser): @@ -213,21 +215,58 @@ def _skip_node(cls, source_node): return False + @staticmethod + def tensor_shape_to_list(shapes): + if isinstance(shapes, attr_value_pb2.AttrValue): + return [dim.size for dim in shapes.shape.dim] + + else: + ret = [] + for shape in shapes: + this_one = [dim.size for dim in shape.dim] + ret.append(this_one) + return ret + + + def _convert_padding(self, source_node, IR_node, kernel_size): + # TODO: Fused conv and pool with padding is different from defused operators + input_node = self.get_parent(source_node.name, [0]) + input_shape = self.tensor_shape_to_list(input_node.get_attr('_output_shapes'))[0] + + if source_node.get_attr('padding') == 'VALID': + dims = len(input_shape) + assign_IRnode_values(IR_node, {'auto_pad' : "VALID", 'pads' : [0, 0] * dims}) + + elif source_node.get_attr('padding') == 'SAME': + padding = compute_tf_same_padding( + input_shape, + kernel_size, + source_node.get_attr('strides')) + assign_IRnode_values(IR_node, {'auto_pad' : "SAME_LOWER", 'pads' : padding}) + + else: + assert False + + def _convert_pooling(self, source_node, pool_type): - IR_node = self._convert_identity_operation(source_node, new_op = 'Pool') + IR_node = self._convert_identity_operation(source_node, new_op='Pool') + kwargs = {} # strides - IR_node.attr['strides'].list.i[:] = source_node.layer.attr['strides'].list.i[:] - - # padding - IR_node.attr['padding'].s = source_node.layer.attr['padding'].s + kwargs['strides'] = source_node.get_attr('strides') # window_shape - IR_node.attr['window_shape'].list.i[:] = source_node.layer.attr['ksize'].list.i[:] + kwargs['kernel_shape'] = source_node.get_attr('ksize') # pool type - IR_node.attr['pooling_type'].s = pool_type + kwargs['pooling_type'] = pool_type + + # padding + self._convert_padding(source_node, IR_node, kwargs['kernel_shape'][1:-1]) + + assign_IRnode_values(IR_node, kwargs) + def gen_IR(self): for layer in self.src_graph.topological_sort: current_node = self.src_graph.get_node(layer) @@ -250,8 +289,9 @@ def _copy_and_reop(source_node, IR_node, new_op = None): IR_node.name = source_node.name IR_node.op = new_op + kwargs = {} if 'data_format' in source_node.layer.attr: - IR_node.attr['data_format'].s = source_node.get_attr('data_format') + kwargs['data_format'] = source_node.get_attr('data_format') if 'dtype' in source_node.layer.attr: assert source_node.layer.attr['dtype'].type in TensorflowParser.dtype_map, 'type [{}] is unknown.'.format(source_node.layer.attr['dtype'].type) @@ -260,6 +300,8 @@ def _copy_and_reop(source_node, IR_node, new_op = None): if '_output_shapes' in source_node.layer.attr: IR_node.attr["_output_shapes"].MergeFromString(source_node.layer.attr['_output_shapes'].SerializeToString()) + assign_IRnode_values(IR_node, kwargs) + def _convert_inedge(self, source_node, IR_node, start_idx = 0, end_idx = None): if end_idx == None: end_idx = len(source_node.in_edges) @@ -268,7 +310,7 @@ def _convert_inedge(self, source_node, IR_node, start_idx = 0, end_idx = None): def _get_bias(self, source_node, IR_node): - if len(source_node.out_edges) < 1: + if not source_node.out_edges: return add_node = self.tf_graph.get_node(source_node.out_edges[0]) @@ -278,7 +320,7 @@ def _get_bias(self, source_node, IR_node): variable = self.tf_graph.get_node(add_node.in_edges[1]) variable = self.tf_graph.get_node(variable.in_edges[0]) - assert variable.layer.attr['shape'].shape.dim[0].size == IR_node.attr['filter'].list.i[-1] + assert variable.layer.attr['shape'].shape.dim[0].size == IR_node.attr['kernel_shape'].list.i[-1] if self.weight_loaded: assert variable.name in self.ckpt_data @@ -299,12 +341,13 @@ def _copy_shape(source_node, IR_node): def rename_UNKNOWN(self, source_node): if source_node.type in self.skip_type: return - print("Tensorflow has not supported operator [%s] with name [%s]." % (source_node.type, source_node.name)) + print("Tensorflow has not supported operator [%s] with name [%s]." + % (source_node.type, source_node.name)) return def rename_Placeholder(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op = 'DataInput') + IR_node = self._convert_identity_operation(source_node, new_op='DataInput') # shape TensorflowParser._copy_shape(source_node, IR_node) @@ -314,24 +357,27 @@ def rename_Conv2D(self, source_node): """ weights: name_weights, name_bias """ - IR_node = self._convert_identity_operation(source_node, 1, 'Convolution') + IR_node = self._convert_identity_operation(source_node, 1, 'Conv') - # strides - IR_node.attr['strides'].list.i[:] = source_node.layer.attr['strides'].list.i[:] + kwargs = {} - # padding - IR_node.attr['padding'].s = source_node.layer.attr['padding'].s + # strides + kwargs['strides'] = source_node.get_attr('strides') # input[1] : W # filter W = self.tf_graph.get_node(source_node.layer.input[1]) W = self.tf_graph.get_node(W.layer.input[0]).layer - for e in W.attr['shape'].shape.dim: - IR_node.attr['filter'].list.i.append(e.size) + kwargs['kernel_shape'] = self.tensor_shape_to_list(W.attr['shape']) + + # padding + self._convert_padding(source_node, IR_node, kwargs['kernel_shape'][:-2]) if self.weight_loaded: self.set_weight(source_node.name, 'weights', self.ckpt_data[W.name]) + assign_IRnode_values(IR_node, kwargs) + # output[0] : B self._get_bias(source_node, IR_node) @@ -378,7 +424,7 @@ def rename_Reshape(self, source_node): IR_node = self._convert_identity_operation(source_node, 1) # for target shape - IR_node.attr["shape"].shape.MergeFromString(source_node.layer.attr['_output_shapes'].list.shape[0].SerializeToString()) + IR_node.attr["shape"].shape.MergeFromString(source_node.get_attr('_output_shapes').shape[0].SerializeToString()) def rename_MatMul(self, source_node): @@ -472,16 +518,16 @@ def rename_QueueDequeueManyV2(self, source_node): def rename_Pad(self, source_node): IR_node = self._convert_identity_operation(source_node, 1, 'Pad') - IR_node.attr['mode'].s = b'CONSTANT' - IR_node.attr['constant_values'].f = 0.0 + kwargs = {} + kwargs['mode'] = 'constant' + kwargs['constant_values'] = 0.0 # paddings padding = self.get_parent(source_node.name, [1]).layer.attr['value'].tensor - shapes = tensor_util.MakeNdarray(padding) - for i in shapes: - for j in i: - IR_node.attr['paddings'].list.i.append(j) + kwargs['pads'] = convert_tf_pad_to_onnx(shapes) + + assign_IRnode_values(IR_node, kwargs) def rename_Mean(self, source_node): @@ -497,16 +543,21 @@ def rename_ConcatV2(self, source_node): def rename_DepthwiseConv2dNative(self, source_node): IR_node = self._convert_identity_operation(source_node, 1, 'DepthwiseConv') - IR_node.attr['strides'].list.i[:] = source_node.layer.attr['strides'].list.i[:] - IR_node.attr['padding'].s = source_node.layer.attr['padding'].s + kwargs = {} + kwargs['strides'] = source_node.get_attr('strides') input_node = self.src_graph.get_parent(source_node.name, [1]) - IR_node.attr['filter'].list.i.extend([dim.size for dim in input_node.layer.attr['_output_shapes'].list.shape[0].dim]) + kwargs['kernel_shape'] = self.tensor_shape_to_list(input_node.get_attr('_output_shapes'))[0] + + self._convert_padding(source_node, IR_node, kwargs['kernel_shape'][:-2]) if self.weight_loaded: weight = self.src_graph.get_parent(source_node.name, [1, 0]) self.set_weight(source_node.name, 'weights', self.ckpt_data[weight.name]) + assign_IRnode_values(IR_node, kwargs) + + def rename_FusedBatchNorm(self, source_node): IR_node = self._convert_identity_operation(source_node, 1, 'BatchNorm') IR_node.attr['epsilon'].f = source_node.get_attr('epsilon', 0)