I'm trying to make some optimizations in a private video player for Linux aiming to improve performance because playing MP4 files are heavy on the CPU, since the video frames are encoded in YV12 and OpenGL doesn't provide a native way to display this format. Right now there's a code that runs on the CPU to converts YV12 to RGB before the image is sent to the GPU for display, and this consumes 100% of CPU processing.
I'm currently investigating how to decode YV12 frames without having to write a shader to do the YV12 -> RGB conversion. As far I as understand, one way to do this is through the GL_MESA_ycbcr_texture, apparently supported by my system (reported by glxinfo
).
In this Fedora Box I have an ATI Technologies Inc RV610 video device [Radeon HD 2400 PRO], which is a decent video card. Then, I downloaded the yuvrect test and made a few changes to replace GL_TEXTURE_RECTANGLE_NV
for a texture that is supported by this card: GL_TEXTURE_RECTANGLE_ARB
.
However, when I execute this modified application it outputs:
The MESA driver reports *unsupported texture format in setup_hardware_state*
I noticed that this error shows up when glPopMatrix();
is executed from the Display()
callback. Now, this doesn't seem like a bug in my application because I ran this exact same code on another Fedora box (same system) which has a different video card: Intel Corporation Sandy Bridge Integrated Graphics Controller (rev 09) , and it works beatifully.
The only visible difference between the 2 binaries are the libraries they are linked with. On the (problematic) ATI card ldd reports:
linux-gate.so.1 => (0x00da3000)
libGL.so.1 => /usr/lib/libGL.so.1 (0x077bd000)
libGLU.so.1 => /usr/lib/libGLU.so.1 (0x0783b000)
libglut.so.3 => /usr/lib/libglut.so.3 (0x005a9000)
libGLEW.so.1.5 => /usr/lib/libGLEW.so.1.5 (0x00aa3000)
libstdc++.so.6 => /usr/lib/libstdc++.so.6 (0x057e2000)
libm.so.6 => /lib/libm.so.6 (0x004e4000)
libgcc_s.so.1 => /lib/libgcc_s.so.1 (0x0053f000)
libc.so.6 => /lib/libc.so.6 (0x00358000)
libX11.so.6 => /usr/lib/libX11.so.6 (0x0071b000)
libXext.so.6 => /usr/lib/libXext.so.6 (0x009c5000)
libXdamage.so.1 => /usr/lib/libXdamage.so.1 (0x00af7000)
libXfixes.so.3 => /usr/lib/libXfixes.so.3 (0x00b76000)
libXxf86vm.so.1 => /usr/lib/libXxf86vm.so.1 (0x0014e000)
libdrm.so.2 => /usr/lib/libdrm.so.2 (0x00101000)
libpthread.so.0 => /lib/libpthread.so.0 (0x00510000)
libdl.so.2 => /lib/libdl.so.2 (0x0052d000)
libXi.so.6 => /usr/lib/libXi.so.6 (0x00110000)
/lib/ld-linux.so.2 (0x00337000)
libxcb.so.1 => /usr/lib/libxcb.so.1 (0x00859000)
librt.so.1 => /lib/librt.so.1 (0x00534000)
libXau.so.6 => /usr/lib/libXau.so.6 (0x00854000)
Meanwhile, on the Intel card you can see that it linked with libv4l and some other libraries while the ATI didn't! I wonder if this have anything to do with the problem I'm facing:
linux-gate.so.1 => (0x008d6000)
/usr/lib/libv4l/v4l1compat.so (0x00345000)
libGL.so.1 => /usr/lib/libGL.so.1 (0x4fb85000)
libGLU.so.1 => /usr/lib/libGLU.so.1 (0x4fc10000)
libglut.so.3 => /usr/lib/libglut.so.3 (0x005a9000)
libGLEW.so.1.5 => /usr/lib/libGLEW.so.1.5 (0x4fc82000)
libstdc++.so.6 => /usr/lib/libstdc++.so.6 (0x42ca7000)
libm.so.6 => /lib/libm.so.6 (0x41fbc000)
libgcc_s.so.1 => /lib/libgcc_s.so.1 (0x42017000)
libc.so.6 => /lib/libc.so.6 (0x41e30000)
libv4l1.so.0 => /usr/lib/libv4l1.so.0 (0x00110000)
libX11.so.6 => /usr/lib/libX11.so.6 (0x421f8000)
libXext.so.6 => /usr/lib/libXext.so.6 (0x424c0000)
libXdamage.so.1 => /usr/lib/libXdamage.so.1 (0x42c0e000)
libXfixes.so.3 => /usr/lib/libXfixes.so.3 (0x42d98000)
libXxf86vm.so.1 => /usr/lib/libXxf86vm.so.1 (0x432a2000)
libdrm.so.2 => /usr/lib/libdrm.so.2 (0x4247b000)
libpthread.so.0 => /lib/libpthread.so.0 (0x41fe8000)
libdl.so.2 => /lib/libdl.so.2 (0x42005000)
libXi.so.6 => /usr/lib/libXi.so.6 (0x42748000)
/lib/ld-linux.so.2 (0x41e0f000)
libv4l2.so.0 => /usr/lib/libv4l2.so.0 (0x4217c000)
libxcb.so.1 => /usr/lib/libxcb.so.1 (0x42337000)
librt.so.1 => /lib/librt.so.1 (0x4200c000)
libv4lconvert.so.0 => /usr/lib/libv4lconvert.so.0 (0x42357000)
libXau.so.6 => /usr/lib/libXau.so.6 (0x421f3000)
libjpeg.so.62 => /usr/lib/libjpeg.so.62 (0x43317000)
If you want to run the example below you'll need readtex.c , readtex.h and girl.rgb, and compile it with: g++ yuvrect.cpp -o yuvrect -lGL -lGLU -lglut -lGLEW
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/glew.h>
#include <GL/glut.h>
#include "readtex.c" /* I know, this is a hack. */
#define TEXTURE_FILE "girl.rgb"
static GLfloat Xrot = 0, Yrot = 0, Zrot = 0;
static GLint ImgWidth, ImgHeight;
static GLushort *ImageYUV = NULL;
static void DrawObject(void)
{
glBegin(GL_QUADS);
glTexCoord2f(0, 0);
glVertex2f(-1.0, -1.0);
glTexCoord2f(ImgWidth, 0);
glVertex2f(1.0, -1.0);
glTexCoord2f(ImgWidth, ImgHeight);
glVertex2f(1.0, 1.0);
glTexCoord2f(0, ImgHeight);
glVertex2f(-1.0, 1.0);
glEnd();
}
static void Display( void )
{
glClear( GL_COLOR_BUFFER_BIT );
glPushMatrix();
glRotatef(Xrot, 1.0, 0.0, 0.0);
glRotatef(Yrot, 0.0, 1.0, 0.0);
glRotatef(Zrot, 0.0, 0.0, 1.0);
DrawObject();
glPopMatrix(); // <--- error message comes from this call
glutSwapBuffers();
}
static void Reshape( int width, int height )
{
glViewport( 0, 0, width, height );
glMatrixMode( GL_PROJECTION );
glLoadIdentity();
glFrustum( -1.0, 1.0, -1.0, 1.0, 10.0, 100.0 );
glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
glTranslatef( 0.0, 0.0, -15.0 );
}
static void Key( unsigned char key, int x, int y )
{
(void) x;
(void) y;
switch (key) {
case 27:
exit(0);
break;
}
glutPostRedisplay();
}
static void SpecialKey( int key, int x, int y )
{
float step = 3.0;
(void) x;
(void) y;
switch (key) {
case GLUT_KEY_UP:
Xrot += step;
break;
case GLUT_KEY_DOWN:
Xrot -= step;
break;
case GLUT_KEY_LEFT:
Yrot += step;
break;
case GLUT_KEY_RIGHT:
Yrot -= step;
break;
}
glutPostRedisplay();
}
static void Init( int argc, char *argv[] )
{
GLuint texObj = 100;
const char *file;
printf("Checking GL_ARB_texture_rectangle\n");
if (!glutExtensionSupported("GL_ARB_texture_rectangle")) {
printf("Sorry, GL_NV_texture_rectangle is required\n");
exit(0);
}
printf("Checking GL_MESA_ycbcr_texture\n");
if (!glutExtensionSupported("GL_MESA_ycbcr_texture")) {
printf("Sorry, GL_MESA_ycbcr_texture is required\n");
exit(0);
}
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texObj);
#ifdef LINEAR_FILTER
/* linear filtering looks much nicer but is much slower for Mesa */
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
#else
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
#endif
if (argc > 1)
file = argv[1];
else
file = TEXTURE_FILE;
ImageYUV = LoadYUVImage(file, &ImgWidth, &ImgHeight);
if (!ImageYUV) {
printf("Couldn't read %s\n", TEXTURE_FILE);
exit(0);
}
printf("Image: %dx%d\n", ImgWidth, ImgHeight);
printf("Calling glTexImage2D\n");
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,
GL_YCBCR_MESA, ImgWidth, ImgHeight, 0,
GL_YCBCR_MESA, GL_UNSIGNED_SHORT_8_8_MESA, ImageYUV);
printf("Called glTexImage2D\n");
assert(glGetError() == GL_NO_ERROR);
printf("* Assert #1\n");
glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,
0, 0, ImgWidth, ImgHeight,
GL_YCBCR_MESA, GL_UNSIGNED_SHORT_8_8_MESA, ImageYUV);
assert(glGetError() == GL_NO_ERROR);
printf("* Assert #2\n");
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
glEnable(GL_TEXTURE_RECTANGLE_ARB);
glShadeModel(GL_FLAT);
glClearColor(0.3, 0.3, 0.4, 1.0);
if (argc > 1 && strcmp(argv[1], "-info")==0) {
printf("GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER));
printf("GL_VERSION = %s\n", (char *) glGetString(GL_VERSION));
printf("GL_VENDOR = %s\n", (char *) glGetString(GL_VENDOR));
printf("GL_EXTENSIONS = %s\n", (char *) glGetString(GL_EXTENSIONS));
}
}
int main( int argc, char *argv[] )
{
glutInit( &argc, argv );
glutInitWindowSize( 300, 300 );
glutInitWindowPosition( 0, 0 );
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
glutCreateWindow(argv[0] );
glewInit();
Init( argc, argv );
glutReshapeFunc( Reshape );
glutKeyboardFunc( Key );
glutSpecialFunc( SpecialKey );
glutDisplayFunc( Display );
glutMainLoop();
return 0;
}
Any tips to solve this issue, guys?
this can most likely be accounted to a driver error, or something. I won't help you with that. However, it is not worth it to shy away from shaders. Using shaders, you can forget about the GL_MESA_ycbcr_texture and make your app more compatible.
We are going to use plain old GL_LUMINANCE_ALPHA format, so the image loads become:
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,
GL_LUMINANCE_ALPHA, ImgWidth, ImgHeight, 0,
GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, ImageYUV);
glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,
0, 0, ImgWidth, ImgHeight,
GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, ImageYUV);
Then about the shaders:
static const char *p_s_vertex_shader =
"varying vec2 t;"
"void main()"
"{"
" t = gl_MultiTexCoord0.xy;"
" gl_Position = ftransform();"
"}";
static const char *p_s_fragment_shader =
"#extension GL_ARB_texture_rectangle : enable\n"
"varying vec2 t;"
"uniform sampler2DRect tex;"
"void main()"
"{"
" vec2 tcEven = vec2(floor(t.x * .5) * 2.0, t.y);"
" vec2 tcOdd = vec2(tcEven.x + 1.0, t.y);"
" float Cb = texture2DRect(tex, tcEven).x - .5;"
" float Cr = texture2DRect(tex, tcOdd).x - .5;"
" float y = texture2DRect(tex, t).w;" // redundant texture read optimized away by texture cache
" float r = y + 1.28033 * Cr;"
" float g = y - .21482 * Cb - .38059 * Cr;"
" float b = y + 2.12798 * Cb;"
" gl_FragColor = vec4(r, g, b, 1.0);"
"}";
int v = glCreateShader(GL_VERTEX_SHADER);
int f = glCreateShader(GL_FRAGMENT_SHADER);
int p = glCreateProgram();
glShaderSource(v, 1, &p_s_vertex_shader, 0);
glShaderSource(f, 1, &p_s_fragment_shader, 0);
glCompileShader(v);
//CheckShader(v);
glCompileShader(f);
//CheckShader(f);
glAttachShader(p, v);
glAttachShader(p, f);
glLinkProgram(p);
glUseProgram(p);
glUniform1i(glGetUniformLocation(p, "tex"), 0);
This come after the textures, somewhere at the end of Init(). And that's it, works like a charm. For debugging, it's probably better to include the CheckShader() as well (it reports compile errors in shaders):
bool CheckShader(int n_shader_object)
{
int n_tmp;
glGetShaderiv(n_shader_object, GL_COMPILE_STATUS, &n_tmp);
bool b_compiled = n_tmp == GL_TRUE;
int n_log_length;
glGetShaderiv(n_shader_object, GL_INFO_LOG_LENGTH, &n_log_length);
// query status ...
if(n_log_length > 1) {
char *p_s_temp_info_log;
if(!(p_s_temp_info_log = (char*)malloc(n_log_length)))
return false;
int n_tmp;
glGetShaderInfoLog(n_shader_object, n_log_length, &n_tmp,
p_s_temp_info_log);
assert(n_tmp <= n_log_length);
fprintf(stderr, "%s\n", p_s_temp_info_log);
free(p_s_temp_info_log);
}
// get/concat info-log
return b_compiled;
}
The shader version will probably be a little bit slower than hardware optimized version, but i'd say it's nothing to worry about.
If you decide to give the code a try, and have any problems with it, let me know ...
EDIT: There actually was a problem running this on ATi, finally it turned out that there was error passing a varying variable "t", in other words this doesn't work:
static const char *p_s_vertex_shader =
"varying vec2 t;"
"void main()"
"{"
" t = gl_MultiTexCoord0.xy;"
" gl_Position = ftransform();"
"}";
static const char *p_s_fragment_shader =
"#extension GL_ARB_texture_rectangle : enable\n"
"varying vec2 t;"
"uniform sampler2DRect tex;"
"void main()"
"{"
" gl_FragColor = texture2DRect(tex, t);"
"}";
And to easily fix this, one can just remove the vertex shader and let the fixed pipeline write texture coordinates. So this works on ATi as well:
static const char *p_s_vertex_shader = null; // no vertex shader
static const char *p_s_fragment_shader =
"#extension GL_ARB_texture_rectangle : enable\n"
"uniform sampler2DRect tex;"
"void main()"
"{"
" vec2 t = gl_TexCoord[0];" // use fixed pipeline output
" gl_FragColor = texture2DRect(tex, t);"
"}";
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With