Accracy problem with softmax layer on Resnet #608

liuhao-97 · 2022-07-15T15:57:38Z

liuhao-97
Jul 15, 2022

Thank you for your great work!
I met accuracy loss when I deployed resnet with hls4ml. For the last output_dense layer, the output is close to keras model. But for the output_softmax layer, the output is not good.

Keras layer 'output_softmax', first sample:
[1.3628477e-04 1.8733885e-04 9.2392882e-05 9.4961268e-01 7.7491371e-05
 4.9028587e-02 2.0920817e-04 6.5380952e-04 1.2276657e-06 9.0956820e-07]
hls4ml layer 'output_softmax', first sample:
[0.        0.        0.        0.3330078 0.        0.3330078 0.
 0.3330078 0.        0.       ]

this is my model:

from tensorflow.keras import layers

# Subtracting pixel mean improves accuracy
subtract_pixel_mean = True

# Model parameter
# ----------------------------------------------------------------------------
#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
# ----------------------------------------------------------------------------
# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
# ---------------------------------------------------------------------------
n = 3

# Model version
# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
version = 1

# Computed depth from supplied model parameter n
if version == 1:
    depth = n * 6 + 2
elif version == 2:
    depth = n * 9 + 2

# Model name, depth and version
model_type = 'ResNet%dv%d' % (depth, version)

# # Load the CIFAR10 data.
# # (x_train, y_train), (x_test, y_test) = cifar10.load_data()

# # Input image dimensions.
# input_shape = x_train.shape[1:]

# # Normalize data.
# x_train = x_train.astype('float32') / 255
# x_test = x_test.astype('float32') / 255

# # If subtract pixel mean is enabled
# if subtract_pixel_mean:
#     x_train_mean = np.mean(x_train, axis=0)
#     x_train -= x_train_mean
#     x_test -= x_train_mean

# print('x_train shape:', x_train.shape)
# print(x_train.shape[0], 'train samples')
# print(x_test.shape[0], 'test samples')
# print('y_train shape:', y_train.shape)

# # Convert class vectors to binary class matrices.
# y_train = keras.utils.to_categorical(y_train, num_classes)
# y_test = keras.utils.to_categorical(y_test, num_classes)


def lr_schedule(epoch):
    """Learning Rate Schedule

    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.

    # Arguments
        epoch (int): The number of epochs

    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr


def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True, indx=0):
    """2D Convolution-Batch Normalization-Activation stack builder

    # Arguments
        inputs (tensor): input tensor from input image or previous layer
        num_filters (int): Conv2D number of filters
        kernel_size (int): Conv2D square kernel dimensions
        strides (int): Conv2D square stride dimensions
        activation (string): activation name
        batch_normalization (bool): whether to include batch normalization
        conv_first (bool): conv-bn-activation (True) or
            bn-activation-conv (False)

    # Returns
        x (tensor): tensor as input to the next layer
    """
#     conv = Conv2D(num_filters,
#                   kernel_size=kernel_size,
#                   strides=strides,
#                   padding='same',
#                   kernel_initializer='he_normal',
#                   kernel_regularizer=l2(1e-4), name = "default_conv")

    x = inputs
    if conv_first:
#         x = conv(x, name= 'conv{}_1'.format(indx))
        x = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4), name= 'conv{}_1'.format(indx))(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
#         x = conv(x, name= 'conv{}_2'.format(indx))
        x = Conv2D(num_filters,
          kernel_size=kernel_size,
          strides=strides,
          padding='same',
          kernel_initializer='he_normal',
          kernel_regularizer=l2(1e-4), name= 'conv{}_2'.format(indx))(x)
    return x


def resnet_v1(input_shape, depth, num_classes=10):
    """ResNet Version 1 Model builder [a]

    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
    Last ReLU is after the shortcut connection.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filters is
    doubled. Within each stage, the layers have the same number filters and the
    same number of filters.
    Features maps sizes:
    stage 0: 32x32, 16
    stage 1: 16x16, 32
    stage 2:  8x8,  64
    The Number of parameters is approx the same as Table 6 of [a]:
    ResNet20 0.27M
    ResNet32 0.46M
    ResNet44 0.66M
    ResNet56 0.85M
    ResNet110 1.7M

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)

    inputs = Input(shape=input_shape)
    indx = 0
    x = resnet_layer(inputs=inputs)
    indx = indx +1
    # Instantiate the stack of residual units
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample
            y = resnet_layer(inputs=x,
                             num_filters=num_filters,
                             strides=strides,indx=indx)
            indx = indx + 1
            y = resnet_layer(inputs=y,
                             num_filters=num_filters,
                             activation=None,indx=indx)
            indx = indx + 1
            
            if stack > 0 and res_block == 0:  # first layer but not first stack
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False, indx=indx)
                indx = indx +1
            x = keras.layers.add([x, y])
            x = Activation('relu')(x)
        num_filters *= 2

    # Add classifier on top.
    # v1 does not use BN after last shortcut connection-ReLU
    
    x = AveragePooling2D(pool_size=8,name='averagepool')(x)
    y = Flatten(name = 'flattenlayer')(x)
    x = Dense(num_classes,
                    activation=None,
                    kernel_initializer='he_normal', name ='output_dense')(y)
  
    outputs = Activation('softmax',name='output_softmax')(x)

    # Instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    return model



model = resnet_v1(input_shape=input_shape, depth=depth)

    
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=lr_schedule(0)),
              metrics=['accuracy'])

print("-----------------------------------")
print("Configuration")

hls4ml.model.optimizer.OutputRoundingSaturationMode.layers = ['Activation']
hls4ml.model.optimizer.OutputRoundingSaturationMode.rounding_mode = 'AP_RND'
hls4ml.model.optimizer.OutputRoundingSaturationMode.saturation_mode = 'AP_SAT'

hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name')    
# hls_config ['LayerName']['output_softmax']['exp_table_t'] = 'ap_fixed<18,8>'
# hls_config ['LayerName']['output_softmax']['inv_table_t'] = 'ap_fixed<18,4>'


#For the lowest possible latency, each layer should have a maximum number of trainable parameters of 4096. 
# This is due to fixed limits in the Vivado compiler, beyond which maximally unrolled (=parallel) compilation will fail. 
# This will allow us to use strategy = 'latency' in the hls4ml part, 
# rather than strategy = 'resource', in turn resulting in lower latency

# If one layer would have >4096 elements, we sould set ['Strategy'] = 'Resource' for that layer, 
# or increase the reuse factor by hand. 
# Set the precision and reuse factor for the full model
hls_config['Model']['Strategy'] = 'Resource'
hls_config['Model']['ReuseFactor'] = 64
hls_config['Model']['Precision'] = 'ap_fixed<32,16>' #

# Create an entry for each layer, here you can for instance change the strategy for a layer to 'resource' 
# or increase the reuse factor individually for large layers.
# In this case, we designed the model to be small enough for a fully parallel implementation 
# so we use the latency strategy and reuse factor of 1 for all layers.
# for Layer in hls_config['LayerName'].keys():
#     hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
#     hls_config['LayerName'][Layer]['ReuseFactor'] = 1

conv_list_1 = ['conv1_1', 'conv2_1','conv3_1', 'conv4_1','conv5_1', 'conv6_1']
conv_list_2 = ['conv10_1', 'conv11_1','conv12_1']
conv_list_3 = ['conv17_1', 'conv18_1','conv19_1', 'conv20_1']
for Layer in hls_config['LayerName'].keys():
    hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
    hls_config['LayerName'][Layer]['ReuseFactor'] = 64
    hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<32,16>'
    hls_config['LayerName'][Layer]['Trace'] = True

    if Layer == "conv0_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 432
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<32,16>'
    
    if Layer in conv_list_1:
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 2304
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<32,16>'

    if Layer == "conv7_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 4608
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<16,6>' #
    
    if Layer == "conv8_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 9216
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<16,6>' #
        
    if Layer == "conv9_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 256
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<16,6>' #
        
    if Layer in conv_list_2:
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 9216
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<16,6>'
    
    if Layer == "conv13_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 9216
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<32,16>'
    
    if Layer == "conv14_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 18432
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<32,16>'
    
    if Layer == "conv15_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 36864
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<16,6>'
        
    if Layer == "conv16_1":
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 256
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<16,6>'
    
    if Layer in conv_list_3:
        hls_config['LayerName'][Layer]['Strategy'] = 'Resource'
        hls_config['LayerName'][Layer]['ReuseFactor'] = 36864
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<16,6>'   
      
    if Layer == "output_dense":
#         hls_config['LayerName'][Layer]['Strategy'] = 'Latency'  
        hls_config['LayerName'][Layer]['ReuseFactor'] = 128
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<32,16>' 
    
    if Layer == "output_softmax":
#         hls_config['LayerName'][Layer]['Strategy'] = 'Latency'  
        hls_config['LayerName'][Layer]['ReuseFactor'] = 2
#         hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<32,16>'        


        
#If you want best numerical performance for high-accuray models, 
#while the default latency strategy is faster but numerically more unstable
# hls_config['LayerName']['output_softmax']['Strategy'] = 'Stable'
# plotting.print_dict(hls_config)


plotting.print_dict(hls_config)
print("-----------------------------------")

Attached is my code.
resnet20_1.zip

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Accracy problem with softmax layer on Resnet #608

{{title}}

Replies: 0 comments

Select a reply

Accracy problem with softmax layer on Resnet #608

liuhao-97 Jul 15, 2022

this is my model:

Replies: 0 comments

liuhao-97
Jul 15, 2022