I need display a jpeg picture, and convert it to YUV420SP. First I use SkBitmap to parse jpeg and display it, then I use the code below to convert RGB565 to YUV420SP on android, but it spend 75ms to convert a 640*480 RGB565 picture, so anybody know the faster way to convert RGB565 to YUV420SP on android? or faster way to convert jpeg file to YUV420SP on android?
// Convert from RGB to YUV420
int RGB2YUV_YR[256], RGB2YUV_YG[256], RGB2YUV_YB[256];
int RGB2YUV_UR[256], RGB2YUV_UG[256], RGB2YUV_UBVR[256];
int RGB2YUV_VG[256], RGB2YUV_VB[256];
//
// Table used for RGB to YUV420 conversion
//
void InitLookupTable()
{
static bool hasInited = false;
if(hasInited)
return ;
hasInited = true;
int i;
for (i = 0; i < 256; i++)
RGB2YUV_YR[i] = (float) 65.481 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_YG[i] = (float) 128.553 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_YB[i] = (float) 24.966 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_UR[i] = (float) 37.797 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_UG[i] = (float) 74.203 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_VG[i] = (float) 93.786 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_VB[i] = (float) 18.214 * (i << 8);
for (i = 0; i < 256; i++)
RGB2YUV_UBVR[i] = (float) 112 * (i << 8);
}
int ConvertRGB5652YUV420SP(int w, int h, unsigned char *bmp, unsigned char *yuv)
{
unsigned char *u, *v, *y, *uu, *vv;
unsigned char *pu1, *pu2, *pu3, *pu4;
unsigned char *pv1, *pv2, *pv3, *pv4;
unsigned char rValue = 0, gValue = 0, bValue = 0;
uint16_t* bmpPtr;
int i, j;
printf("ConvertRGB5652YUV420SP begin,w=%d,h=%d,bmp=%p,yuv=%p\n", w, h, bmp, yuv);
struct timeval tpstart,tpend;
gettimeofday(&tpstart,NULL);
InitLookupTable();
gettimeofday(&tpend,NULL);
float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("InitLookupTable used time=%f\n", timeuse);
gettimeofday(&tpstart,NULL);
uu = new unsigned char[w * h];
vv = new unsigned char[w * h];
if (uu == NULL || vv == NULL || yuv == NULL)
return 0;
y = yuv;
u = uu;
v = vv;
// Get r,g,b pointers from bmp image data....
bmpPtr = (uint16_t*)bmp;
//Get YUV values for rgb values...
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
uint16_t color = *bmpPtr;
unsigned int r = (color>>11) & 0x1f;
unsigned int g = (color>> 5) & 0x3f;
unsigned int b = (color ) & 0x1f;
rValue = (r<<3) | (r>>2);
gValue = (g<<2) | (g>>4);
bValue = (b<<3) | (b>>2);
*y++ = (RGB2YUV_YR[rValue] + RGB2YUV_YG[gValue] + RGB2YUV_YB[bValue] +
1048576) >> 16;
*u++ = (-RGB2YUV_UR[rValue] - RGB2YUV_UG[gValue] + RGB2YUV_UBVR[bValue] +
8388608) >> 16;
*v++ = (RGB2YUV_UBVR[rValue] - RGB2YUV_VG[gValue] - RGB2YUV_VB[bValue] +
8388608) >> 16;
bmpPtr++;
}
}
gettimeofday(&tpend,NULL);
timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("Get YUV values used time=%f\n", timeuse);
gettimeofday(&tpstart,NULL);
// Now sample the U & V to obtain YUV 4:2:0 format
// Get the right pointers...
u = yuv + w * h;
v = u + 1;
// For U
pu1 = uu;
pu2 = pu1 + 1;
pu3 = pu1 + w;
pu4 = pu3 + 1;
// For V
pv1 = vv;
pv2 = pv1 + 1;
pv3 = pv1 + w;
pv4 = pv3 + 1;
// Do sampling....
for (i = 0; i < h; i += 2) {
for (j = 0; j < w; j += 2) {
*u = (*pu1 + *pu2 + *pu3 + *pu4) >> 2;
u += 2;
*v = (*pv1 + *pv2 + *pv3 + *pv4) >> 2;
v += 2;
pu1 += 2;
pu2 += 2;
pu3 += 2;
pu4 += 2;
pv1 += 2;
pv2 += 2;
pv3 += 2;
pv4 += 2;
}
pu1 += w;
pu2 += w;
pu3 += w;
pu4 += w;
pv1 += w;
pv2 += w;
pv3 += w;
pv4 += w;
}
gettimeofday(&tpend,NULL);
timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("Do sampling used time=%f\n", timeuse);
gettimeofday(&tpstart,NULL);
delete uu;
delete vv;
return 1;
}
int main(int argc, char **argv) {
unsigned char bmp[640*480*2] = {0};
unsigned char yuv[(640*480*3)/2] = {0};
struct timeval tpstart,tpend;
gettimeofday(&tpstart,NULL);
ConvertRGB5652YUV420SP(640, 480, bmp, yuv);
gettimeofday(&tpend,NULL);
float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
timeuse/=1000;
printf("ConvertARGB2YUV420SP used time=%f\n", timeuse);
return 0;
}
output on android(armv6):
ConvertRGB5652YUV420SP begin,w=640,h=480,bmp=0xbe7314fc,yuv=0xbe7c74fc
InitLookupTable used time=0.383000
Get YUV values used time=61.394001
Do sampling used time=11.918000
ConvertARGB2YUV420SP used time=74.596001
cpu info:
$ cat /proc/cpuinfo
cat /proc/cpuinfo
Processor : ARMv6-compatible processor rev 5 (v6l)
BogoMIPS : 791.34
Features : swp half thumb fastmult vfp edsp java
CPU implementer : 0x41
CPU architecture: 6TEJ
CPU variant : 0x1
CPU part : 0xb36
CPU revision : 5
Hardware : IMAPX200
Revision : 0000
Serial : 0000000000000000
On ARMv7, use NEON. It will do the job in less than 1ms. (VGA)
If you are stuck with ARMv6, optimize it in ARM assembly. (about 8ms on VGA)
Use fixed-point arithmetic instead of the lookup tables. Get rid of them.
make two masks :
then load two pixels at once into a 32bit register (which is a lot faster than 16bit read)
and red, mask1, pixel, lsr #11
and grn, mask2, pixel, lsr #5
and blu, mask1, pixel
now you have three registers, each containing two values - one in the lower, and the other in the upper 16 bits.
smulxy instructions will do some miracles from here on. (16bit multiply)
Good luck.
PS : your lookup table isn't that good either. Why are they all in length of 256? You could reduce them to 32 (r and b related) and 64 (g related) Which will increase the cache hit rate. Probably that will just do for the targeted 40ms without resorting to assembly. Yes, cache-misses are THAT painful.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With