ENet
Later in our review of literature published related to segmentation models, we came across ENet (Efficient Neural Network), which is termed as a real-time semantic segmentation neural network designed for efficient processing on embedded systems and mobile devices. It was developed by researchers at Samsung AI Center and first presented in the paper “ENet: A Deep Neural Network Architecture for Real-Time Semantic Segmentation” published in 2016.
Why ENet?
Performance and Efficiency
Network | 1024x512 | 1280x720 | Parameters | Model Size |
---|---|---|---|---|
ENet | 20.4 ms | 32.9 ms | 0.36 M | 1.5 MB |
SegNet | 66.5 ms | 114.3 ms | 29.4 M | 117.8 MB |
A comparison of computational time, number of parameters and model size required for ENet and SegNet
The key advantages of ENet
- High Response Time
- Low computational requirements
- Smaller memory footprint compared to existing models
- Maintains comparable accuracy.
Considering the viability of the model for our project which mainly focuses on deploying a reliable model to identify boxes in an industrial environment, a suggestion was made at a discussion with the supervisor to consider this model, as it would allow us to deploy such a model in edge environments where memory and process is a constrained resource. The suggestion was reviewed and approved by the supervisor later and it lead to the transition from SegNet to ENet.
ENet Architecture
PyTorch Implementation
class InitialBlock(nn.Module):
# Initial block of the model:
# Input
# / \
# / \
# maxpool2d conv2d-3x3
# \ /
# \ /
# concatenate
def __init__(self, in_channels=3, out_channels=13):
super().__init__()
self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size=3, stride=2, padding=1
)
self.prelu = nn.PReLU(16)
self.batchnorm = nn.BatchNorm2d(out_channels)
def forward(self, x):
main = self.conv(x)
main = self.batchnorm(main)
side = self.maxpool(x)
# concatenating on the channels axis
x = torch.cat((main, side), dim=1)
x = self.prelu(x)
return x
class UBNeck(nn.Module):
# Upsampling bottleneck:
# Bottleneck Input
# / \
# / \
# conv2d-1x1 convTrans2d-1x1
# | | PReLU
# | convTrans2d-3x3
# | | PReLU
# | convTrans2d-1x1
# | |
# maxunpool2d Regularizer
# \ /
# \ /
# Summing + PReLU
#
# Params:
# projection_ratio - ratio between input and output channels
# relu - if True: relu used as the activation function else: Prelu us used
def __init__(self, in_channels, out_channels, relu=False, projection_ratio=4):
super().__init__()
# Define class variables
self.in_channels = in_channels
self.reduced_depth = int(in_channels / projection_ratio)
self.out_channels = out_channels
if relu:
activation = nn.ReLU()
else:
activation = nn.PReLU()
self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2)
self.main_conv = nn.Conv2d(
in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=1
)
self.dropout = nn.Dropout2d(p=0.1)
self.convt1 = nn.ConvTranspose2d(
in_channels=self.in_channels,
out_channels=self.reduced_depth,
kernel_size=1,
padding=0,
bias=False,
)
self.prelu1 = activation
# This layer used for Upsampling
self.convt2 = nn.ConvTranspose2d(
in_channels=self.reduced_depth,
out_channels=self.reduced_depth,
kernel_size=3,
stride=2,
padding=1,
output_padding=1,
bias=False,
)
self.prelu2 = activation
self.convt3 = nn.ConvTranspose2d(
in_channels=self.reduced_depth,
out_channels=self.out_channels,
kernel_size=1,
padding=0,
bias=False,
)
self.prelu3 = activation
self.batchnorm = nn.BatchNorm2d(self.reduced_depth)
self.batchnorm2 = nn.BatchNorm2d(self.out_channels)
def forward(self, x, indices):
x_copy = x
# Side Branch
x = self.convt1(x)
x = self.batchnorm(x)
x = self.prelu1(x)
x = self.convt2(x)
x = self.batchnorm(x)
x = self.prelu2(x)
x = self.convt3(x)
x = self.batchnorm2(x)
x = self.dropout(x)
# Main Branch
x_copy = self.main_conv(x_copy)
x_copy = self.unpool(x_copy, indices, output_size=x.size())
# summing the main and side branches
x = x + x_copy
x = self.prelu3(x)
return x
class RDDNeck(nn.Module):
def __init__(
self,
dilation,
in_channels,
out_channels,
down_flag,
relu=False,
projection_ratio=4,
p=0.1,
):
# Regular|Dilated|Downsampling bottlenecks:
#
# Bottleneck Input
# / \
# / \
# maxpooling2d conv2d-1x1
# | | PReLU
# | conv2d-3x3
# | | PReLU
# | conv2d-1x1
# | |
# Padding2d Regularizer
# \ /
# \ /
# Summing + PReLU
#
# Params:
# dilation (bool) - if True: creating dilation bottleneck
# down_flag (bool) - if True: creating downsampling bottleneck
# projection_ratio - ratio between input and output channels
# relu - if True: relu used as the activation function else: Prelu us used
# p - dropout ratio
super().__init__()
# Define class variables
self.in_channels = in_channels
self.out_channels = out_channels
self.dilation = dilation
self.down_flag = down_flag
# calculating the number of reduced channels
if down_flag:
self.stride = 2
self.reduced_depth = int(in_channels // projection_ratio)
else:
self.stride = 1
self.reduced_depth = int(out_channels // projection_ratio)
if relu:
activation = nn.ReLU()
else:
activation = nn.PReLU()
self.maxpool = nn.MaxPool2d(
kernel_size=2, stride=2, padding=0, return_indices=True
)
self.dropout = nn.Dropout2d(p=p)
self.conv1 = nn.Conv2d(
in_channels=self.in_channels,
out_channels=self.reduced_depth,
kernel_size=1,
stride=1,
padding=0,
bias=False,
dilation=1,
)
self.prelu1 = activation
self.conv2 = nn.Conv2d(
in_channels=self.reduced_depth,
out_channels=self.reduced_depth,
kernel_size=3,
stride=self.stride,
padding=self.dilation,
bias=True,
dilation=self.dilation,
)
self.prelu2 = activation
self.conv3 = nn.Conv2d(
in_channels=self.reduced_depth,
out_channels=self.out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False,
dilation=1,
)
self.prelu3 = activation
self.batchnorm = nn.BatchNorm2d(self.reduced_depth)
self.batchnorm2 = nn.BatchNorm2d(self.out_channels)
def forward(self, x):
bs = x.size()[0]
x_copy = x
# Side Branch
x = self.conv1(x)
x = self.batchnorm(x)
x = self.prelu1(x)
x = self.conv2(x)
x = self.batchnorm(x)
x = self.prelu2(x)
x = self.conv3(x)
x = self.batchnorm2(x)
x = self.dropout(x)
# Main Branch
if self.down_flag:
x_copy, indices = self.maxpool(x_copy)
if self.in_channels != self.out_channels:
out_shape = self.out_channels - self.in_channels
# padding and concatenating in order to match the channels axis of the side and main branches
extras = torch.zeros((bs, out_shape, x.shape[2], x.shape[3]))
if torch.cuda.is_available():
extras = extras.cuda()
x_copy = torch.cat((x_copy, extras), dim=1)
# Summing main and side branches
x = x + x_copy
x = self.prelu3(x)
if self.down_flag:
return x, indices
else:
return x
class ASNeck(nn.Module):
def __init__(self, in_channels, out_channels, projection_ratio=4):
# Asymetric bottleneck:
#
# Bottleneck Input
# / \
# / \
# | conv2d-1x1
# | | PReLU
# | conv2d-1x5
# | |
# | conv2d-5x1
# | | PReLU
# | conv2d-1x1
# | |
# Padding2d Regularizer
# \ /
# \ /
# Summing + PReLU
#
# Params:
# projection_ratio - ratio between input and output channels
super().__init__()
# Define class variables
self.in_channels = in_channels
self.reduced_depth = int(in_channels / projection_ratio)
self.out_channels = out_channels
self.dropout = nn.Dropout2d(p=0.1)
self.conv1 = nn.Conv2d(
in_channels=self.in_channels,
out_channels=self.reduced_depth,
kernel_size=1,
stride=1,
padding=0,
bias=False,
)
self.prelu1 = nn.PReLU()
self.conv21 = nn.Conv2d(
in_channels=self.reduced_depth,
out_channels=self.reduced_depth,
kernel_size=(1, 5),
stride=1,
padding=(0, 2),
bias=False,
)
self.conv22 = nn.Conv2d(
in_channels=self.reduced_depth,
out_channels=self.reduced_depth,
kernel_size=(5, 1),
stride=1,
padding=(2, 0),
bias=False,
)
self.prelu2 = nn.PReLU()
self.conv3 = nn.Conv2d(
in_channels=self.reduced_depth,
out_channels=self.out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False,
)
self.prelu3 = nn.PReLU()
self.batchnorm = nn.BatchNorm2d(self.reduced_depth)
self.batchnorm2 = nn.BatchNorm2d(self.out_channels)
def forward(self, x):
bs = x.size()[0]
x_copy = x
# Side Branch
x = self.conv1(x)
x = self.batchnorm(x)
x = self.prelu1(x)
x = self.conv21(x)
x = self.conv22(x)
x = self.batchnorm(x)
x = self.prelu2(x)
x = self.conv3(x)
x = self.dropout(x)
x = self.batchnorm2(x)
# Main Branch
if self.in_channels != self.out_channels:
out_shape = self.out_channels - self.in_channels
# padding and concatenating in order to match the channels axis of the side and main branches
extras = torch.zeros((bs, out_shape, x.shape[2], x.shape[3]))
if torch.cuda.is_available():
extras = extras.cuda()
x_copy = torch.cat((x_copy, extras), dim=1)
# Summing main and side branches
x = x + x_copy
x = self.prelu3(x)
return x
class ENet(nn.Module):
# Creating Enet model!
def __init__(self, C):
super().__init__()
# Define class variables
# C - number of classes
self.C = C
# The initial block
self.init = InitialBlock()
# The first bottleneck
self.b10 = RDDNeck(
dilation=1, in_channels=16, out_channels=64, down_flag=True, p=0.01
)
self.b11 = RDDNeck(
dilation=1, in_channels=64, out_channels=64, down_flag=False, p=0.01
)
self.b12 = RDDNeck(
dilation=1, in_channels=64, out_channels=64, down_flag=False, p=0.01
)
self.b13 = RDDNeck(
dilation=1, in_channels=64, out_channels=64, down_flag=False, p=0.01
)
self.b14 = RDDNeck(
dilation=1, in_channels=64, out_channels=64, down_flag=False, p=0.01
)
# The second bottleneck
self.b20 = RDDNeck(dilation=1, in_channels=64, out_channels=128, down_flag=True)
self.b21 = RDDNeck(
dilation=1, in_channels=128, out_channels=128, down_flag=False
)
self.b22 = RDDNeck(
dilation=2, in_channels=128, out_channels=128, down_flag=False
)
self.b23 = ASNeck(in_channels=128, out_channels=128)
self.b24 = RDDNeck(
dilation=4, in_channels=128, out_channels=128, down_flag=False
)
self.b25 = RDDNeck(
dilation=1, in_channels=128, out_channels=128, down_flag=False
)
self.b26 = RDDNeck(
dilation=8, in_channels=128, out_channels=128, down_flag=False
)
self.b27 = ASNeck(in_channels=128, out_channels=128)
self.b28 = RDDNeck(
dilation=16, in_channels=128, out_channels=128, down_flag=False
)
# The third bottleneck
self.b31 = RDDNeck(
dilation=1, in_channels=128, out_channels=128, down_flag=False
)
self.b32 = RDDNeck(
dilation=2, in_channels=128, out_channels=128, down_flag=False
)
self.b33 = ASNeck(in_channels=128, out_channels=128)
self.b34 = RDDNeck(
dilation=4, in_channels=128, out_channels=128, down_flag=False
)
self.b35 = RDDNeck(
dilation=1, in_channels=128, out_channels=128, down_flag=False
)
self.b36 = RDDNeck(
dilation=8, in_channels=128, out_channels=128, down_flag=False
)
self.b37 = ASNeck(in_channels=128, out_channels=128)
self.b38 = RDDNeck(
dilation=16, in_channels=128, out_channels=128, down_flag=False
)
# The fourth bottleneck
self.b40 = UBNeck(in_channels=128, out_channels=64, relu=True)
self.b41 = RDDNeck(
dilation=1, in_channels=64, out_channels=64, down_flag=False, relu=True
)
self.b42 = RDDNeck(
dilation=1, in_channels=64, out_channels=64, down_flag=False, relu=True
)
# The fifth bottleneck
self.b50 = UBNeck(in_channels=64, out_channels=16, relu=True)
self.b51 = RDDNeck(
dilation=1, in_channels=16, out_channels=16, down_flag=False, relu=True
)
# Final ConvTranspose Layer
self.fullconv = nn.ConvTranspose2d(
in_channels=16,
out_channels=self.C,
kernel_size=3,
stride=2,
padding=1,
output_padding=1,
bias=False,
)
def forward(self, x):
# The initial block
x = self.init(x)
# The first bottleneck
x, i1 = self.b10(x)
x = self.b11(x)
x = self.b12(x)
x = self.b13(x)
x = self.b14(x)
# The second bottleneck
x, i2 = self.b20(x)
x = self.b21(x)
x = self.b22(x)
x = self.b23(x)
x = self.b24(x)
x = self.b25(x)
x = self.b26(x)
x = self.b27(x)
x = self.b28(x)
# The third bottleneck
x = self.b31(x)
x = self.b32(x)
x = self.b33(x)
x = self.b34(x)
x = self.b35(x)
x = self.b36(x)
x = self.b37(x)
x = self.b38(x)
# The fourth bottleneck
x = self.b40(x, i2)
x = self.b41(x)
x = self.b42(x)
# The fifth bottleneck
x = self.b50(x, i1)
x = self.b51(x)
# Final ConvTranspose Layer
x = self.fullconv(x)
return x