I have a semantic segmentation model using PyTorch
. In order to participate in a competition, I am compiling the test.py
to an exe file with PyInstaller
and UPX
. Although the resulting executable file runs correctly, its size is nearly 800MB. How do I make it smaller?
This is my test.py:
from torch import nn
from torch.autograd import Variable as V
from torch import Tensor
from torch import cuda
from torch import load
import cv2
import os
import numpy as np
from time import time
from networks.unet import Unet
# from networks.dunet import Dunet
# from networks.dinknet import LinkNet34, DinkNet34, DinkNet50, DinkNet101, DinkNet34_less_pool
# from networks.dinkbranch import DinkBranch50, DinkBranch34
BATCHSIZE_PER_CARD = 2
class TTAFrame():
def __init__(self, net):
self.net = net().cuda()
self.net = nn.DataParallel(self.net, device_ids=range(cuda.device_count()))
def test_one_img_from_path(self, path, evalmode = True):
if evalmode:
self.net.eval()
batchsize = cuda.device_count() * BATCHSIZE_PER_CARD
if batchsize >= 8:
return self.test_one_img_from_path_1(path)
elif batchsize >= 4:
return self.test_one_img_from_path_2(path)
elif batchsize >= 2:
return self.test_one_img_from_path_4(path)
def test_one_img_from_path_8(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.array(img1)[:,:,::-1]
img4 = np.array(img2)[:,:,::-1]
img1 = img1.transpose(0,3,1,2)
img2 = img2.transpose(0,3,1,2)
img3 = img3.transpose(0,3,1,2)
img4 = img4.transpose(0,3,1,2)
img1 = V(Tensor(np.array(img1, np.float32)/255.0 * 3.2 -1.6).cuda())
img2 = V(Tensor(np.array(img2, np.float32)/255.0 * 3.2 -1.6).cuda())
img3 = V(Tensor(np.array(img3, np.float32)/255.0 * 3.2 -1.6).cuda())
img4 = V(Tensor(np.array(img4, np.float32)/255.0 * 3.2 -1.6).cuda())
maska = self.net.forward(img1).squeeze().cpu().data.numpy()
maskb = self.net.forward(img2).squeeze().cpu().data.numpy()
maskc = self.net.forward(img3).squeeze().cpu().data.numpy()
maskd = self.net.forward(img4).squeeze().cpu().data.numpy()
mask1 = maska + maskb[:,::-1] + maskc[:,:,::-1] + maskd[:,::-1,::-1]
mask2 = mask1[0] + np.rot90(mask1[1])[::-1,::-1]
return mask2
def test_one_img_from_path_4(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.array(img1)[:,:,::-1]
img4 = np.array(img2)[:,:,::-1]
img1 = img1.transpose(0,3,1,2)
img2 = img2.transpose(0,3,1,2)
img3 = img3.transpose(0,3,1,2)
img4 = img4.transpose(0,3,1,2)
img1 = V(Tensor(np.array(img1, np.float32)/255.0 * 3.2 -1.6).cuda())
img2 = V(Tensor(np.array(img2, np.float32)/255.0 * 3.2 -1.6).cuda())
img3 = V(Tensor(np.array(img3, np.float32)/255.0 * 3.2 -1.6).cuda())
img4 = V(Tensor(np.array(img4, np.float32)/255.0 * 3.2 -1.6).cuda())
maska = self.net.forward(img1).squeeze().cpu().data.numpy()
maskb = self.net.forward(img2).squeeze().cpu().data.numpy()
maskc = self.net.forward(img3).squeeze().cpu().data.numpy()
maskd = self.net.forward(img4).squeeze().cpu().data.numpy()
mask1 = maska + maskb[:,::-1] + maskc[:,:,::-1] + maskd[:,::-1,::-1]
mask2 = mask1[0] + np.rot90(mask1[1])[::-1,::-1]
return mask2
def test_one_img_from_path_2(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.concatenate([img1,img2])
img4 = np.array(img3)[:,:,::-1]
img5 = img3.transpose(0,3,1,2)
img5 = np.array(img5, np.float32)/255.0 * 3.2 -1.6
img5 = V(Tensor(img5).cuda())
img6 = img4.transpose(0,3,1,2)
img6 = np.array(img6, np.float32)/255.0 * 3.2 -1.6
img6 = V(Tensor(img6).cuda())
maska = self.net.forward(img5).squeeze().cpu().data.numpy()#.squeeze(1)
maskb = self.net.forward(img6).squeeze().cpu().data.numpy()
mask1 = maska + maskb[:,:,::-1]
mask2 = mask1[:2] + mask1[2:,::-1]
mask3 = mask2[0] + np.rot90(mask2[1])[::-1,::-1]
return mask3
def test_one_img_from_path_1(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.concatenate([img1,img2])
img4 = np.array(img3)[:,:,::-1]
img5 = np.concatenate([img3,img4]).transpose(0,3,1,2)
img5 = np.array(img5, np.float32)/255.0 * 3.2 -1.6
img5 = V(Tensor(img5).cuda())
mask = self.net.forward(img5).squeeze().cpu().data.numpy()#.squeeze(1)
mask1 = mask[:4] + mask[4:,:,::-1]
mask2 = mask1[:2] + mask1[2:,::-1]
mask3 = mask2[0] + np.rot90(mask2[1])[::-1,::-1]
return mask3
def load(self, path):
self.net.load_state_dict(load(path))
#source = 'dataset/test/'
import sys
if len(sys.argv) < 2:
arg1 = r'dataset/504/original'
else:
arg1 = sys.argv[1]
# source = r'dataset/504/original'
source = arg1
source_path = os.path.join(os.getcwd(), source)
val = os.listdir(source_path)
solver = TTAFrame(Unet)
model_path = '/'
model_path = r'weights/log02_Unet.th'
solver.load(os.path.join(os.getcwd(), model_path))
tic = time()
target = r'submits/log02_baseline504'
target_path = os.path.join(os.getcwd(), target)
if os.path.exists(target_path):
pass
else:
os.makedirs(target_path)
for i,name in enumerate(val):
if i%10 == 0:
print(i/10, ' ','%.2f'%(time()-tic))
mask = solver.test_one_img_from_path(os.path.join(source_path, name))
mask[mask>4.0] = 255
mask[mask<=4.0] = 0
mask = np.concatenate([mask[:,:,None],mask[:,:,None],mask[:,:,None]],axis=2)
cv2.imwrite(target_path+r'/'+name[:-7]+'mask.png', mask.astype(np.uint8))
This is the 'Unet' file:
from torch import autograd, cat
from torch import nn
class Unet(nn.Module):
def __init__(self):
super(Unet, self).__init__()
self.down1 = self.conv_stage(3, 8)
self.down2 = self.conv_stage(8, 16)
self.down3 = self.conv_stage(16, 32)
self.down4 = self.conv_stage(32, 64)
self.down5 = self.conv_stage(64, 128)
self.down6 = self.conv_stage(128, 256)
self.down7 = self.conv_stage(256, 512)
self.center = self.conv_stage(512, 1024)
#self.center_res = self.resblock(1024)
self.up7 = self.conv_stage(1024, 512)
self.up6 = self.conv_stage(512, 256)
self.up5 = self.conv_stage(256, 128)
self.up4 = self.conv_stage(128, 64)
self.up3 = self.conv_stage(64, 32)
self.up2 = self.conv_stage(32, 16)
self.up1 = self.conv_stage(16, 8)
self.trans7 = self.upsample(1024, 512)
self.trans6 = self.upsample(512, 256)
self.trans5 = self.upsample(256, 128)
self.trans4 = self.upsample(128, 64)
self.trans3 = self.upsample(64, 32)
self.trans2 = self.upsample(32, 16)
self.trans1 = self.upsample(16, 8)
self.conv_last = nn.Sequential(
nn.Conv2d(8, 1, 3, 1, 1),
nn.Sigmoid()
)
self.max_pool = nn.MaxPool2d(2)
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
m.bias.data.zero_()
def conv_stage(self, dim_in, dim_out, kernel_size=3, stride=1, padding=1, bias=True, useBN=False):
if useBN:
return nn.Sequential(
nn.Conv2d(dim_in, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.BatchNorm2d(dim_out),
#nn.LeakyReLU(0.1),
nn.ReLU(),
nn.Conv2d(dim_out, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.BatchNorm2d(dim_out),
#nn.LeakyReLU(0.1),
nn.ReLU(),
)
else:
return nn.Sequential(
nn.Conv2d(dim_in, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.ReLU(),
nn.Conv2d(dim_out, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.ReLU()
)
def upsample(self, ch_coarse, ch_fine):
return nn.Sequential(
nn.ConvTranspose2d(ch_coarse, ch_fine, 4, 2, 1, bias=False),
nn.ReLU()
)
def forward(self, x):
conv1_out = self.down1(x)
conv2_out = self.down2(self.max_pool(conv1_out))
conv3_out = self.down3(self.max_pool(conv2_out))
conv4_out = self.down4(self.max_pool(conv3_out))
conv5_out = self.down5(self.max_pool(conv4_out))
conv6_out = self.down6(self.max_pool(conv5_out))
conv7_out = self.down7(self.max_pool(conv6_out))
out = self.center(self.max_pool(conv7_out))
#out = self.center_res(out)
out = self.up7(cat((self.trans7(out), conv7_out), 1))
out = self.up6(cat((self.trans6(out), conv6_out), 1))
out = self.up5(cat((self.trans5(out), conv5_out), 1))
out = self.up4(cat((self.trans4(out), conv4_out), 1))
out = self.up3(cat((self.trans3(out), conv3_out), 1))
out = self.up2(cat((self.trans2(out), conv2_out), 1))
out = self.up1(cat((self.trans1(out), conv1_out), 1))
out = self.conv_last(out)
return out
pyinstaller
is kind of cheated .exe
. It does not compile the script, but bundles what's needed (including python interpreter) into one (or many) files.
To really be Python agnostic you should convert your model using torchscript
(read about it here). You will be able to run your module using C++ libtorch
without Python interpreter.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With