I'm developing a simple sprite-based 2D game in C++ that uses OpenGL for hardware-accelerated rendering, and SDL for window management and user input handling. Since it's a 2D game, I'm only ever going to need to draw quads, but because the number of sprites is dynamic, I can never rely on there being a constant number of quads. Consequently, I need to rebuffer all of the vertex data via my VBO each frame (since there may be more or fewer quads than there were in the last frame, and thus the buffer may be a different size).
The prototype program I have so far creates a window and allows the user to add and remove quads in a diagonal row by using the up and down arrow keys. Right now the quads I'm drawing are simple, untextured white squares. Here is the code I'm working with (compiles and works correctly under OS X 10.6.8 and Ubuntu 12.04 with OpenGL 2.1):
#if defined(__APPLE__)
#include <OpenGL/OpenGL.h>
#endif
#if defined(__linux__)
#define GL_GLEXT_PROTOTYPES
#include <GL/glx.h>
#endif
#include <GL/gl.h>
#include <SDL.h>
#include <iostream>
#include <vector>
#include <string>
struct Vertex
{
//vertex coordinates
GLint x;
GLint y;
};
//Constants
const int SCREEN_WIDTH = 1024;
const int SCREEN_HEIGHT = 768;
const int FPS = 60; //our framerate
//Globals
SDL_Surface *screen; //the screen
std::vector<Vertex> vertices; //the actual vertices for the quads
std::vector<GLint> startingElements; //the index where the 4 vertices of each quad begin in the 'vertices' vector
std::vector<GLint> counts; //the number of vertices for each quad
GLuint VBO = 0; //the handle to the vertex buffer
void createVertex(GLint x, GLint y)
{
Vertex vertex;
vertex.x = x;
vertex.y = y;
vertices.push_back(vertex);
}
//creates a quad at position x,y, with a width of w and a height of h (in pixels)
void createQuad(GLint x, GLint y, GLint w, GLint h)
{
//Since we're drawing the quads using GL_TRIANGLE_STRIP, the vertex drawing
//order is from top to bottom, left to right, like so:
//
// 1-----3
// | |
// | |
// 2-----4
createVertex(x, y); //top-left vertex
createVertex(x, y+h); //bottom-left vertex
createVertex(x+w, y); //top-right vertex
createVertex(x+w, y+h); //bottom-right vertex
counts.push_back(4); //each quad will always have exactly 4 vertices
startingElements.push_back(startingElements.size()*4);
std::cout << "Number of Quads: " << counts.size() << std::endl; //print out the current number of quads
}
//removes the most recently created quad
void removeQuad()
{
if (counts.size() > 0) //we don't want to remove a quad if there aren't any to remove
{
for (int i=0; i<4; i++)
{
vertices.pop_back();
}
startingElements.pop_back();
counts.pop_back();
std::cout << "Number of Quads: " << counts.size() << std::endl;
}
else
{
std::cout << "Sorry, you can't remove a quad if there are no quads to remove!" << std::endl;
}
}
void init()
{
//initialize SDL
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER);
screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);
#if defined(__APPLE__)
//Enable vsync so that we don't get tearing when rendering
GLint swapInterval = 1;
CGLSetParameter(CGLGetCurrentContext(), kCGLCPSwapInterval, &swapInterval);
#endif
//Disable depth testing, lighting, and dithering, since we're going to be doing 2D rendering only
glDisable(GL_DEPTH_TEST);
glDisable(GL_LIGHTING);
glDisable(GL_DITHER);
glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_LIGHTING_BIT);
//Set the projection matrix
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, SCREEN_WIDTH, SCREEN_HEIGHT, 0, -1.0, 1.0);
//Set the modelview matrix
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
//Create VBO
glGenBuffers(1, &VBO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
}
void gameLoop()
{
int frameDuration = 1000/FPS; //the set duration (in milliseconds) of a single frame
int currentTicks;
int pastTicks = SDL_GetTicks();
bool done = false;
SDL_Event event;
while(!done)
{
//handle user input
while(SDL_PollEvent(&event))
{
switch(event.type)
{
case SDL_KEYDOWN:
switch (event.key.keysym.sym)
{
case SDLK_UP: //create a new quad every time the up arrow key is pressed
createQuad(64*counts.size(), 64*counts.size(), 64, 64);
break;
case SDLK_DOWN: //remove the most recently created quad every time the down arrow key is pressed
removeQuad();
break;
default:
break;
}
break;
case SDL_QUIT:
done = true;
break;
default:
break;
}
}
//Clear the color buffer
glClear(GL_COLOR_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
//replace the current contents of the VBO with a completely new set of data (possibly including either more or fewer quads)
glBufferData(GL_ARRAY_BUFFER, vertices.size()*sizeof(Vertex), &vertices.front(), GL_DYNAMIC_DRAW);
glEnableClientState(GL_VERTEX_ARRAY);
//Set vertex data
glVertexPointer(2, GL_INT, sizeof(Vertex), 0);
//Draw the quads
glMultiDrawArrays(GL_TRIANGLE_STRIP, &startingElements.front(), &counts.front(), counts.size());
glDisableClientState(GL_VERTEX_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
//Check to see if we need to delay the duration of the current frame to match the set framerate
currentTicks = SDL_GetTicks();
int currentDuration = (currentTicks - pastTicks); //the duration of the frame so far
if (currentDuration < frameDuration)
{
SDL_Delay(frameDuration - currentDuration);
}
pastTicks = SDL_GetTicks();
// flip the buffers
SDL_GL_SwapBuffers();
}
}
void cleanUp()
{
glDeleteBuffers(1, &VBO);
SDL_FreeSurface(screen);
SDL_Quit();
}
int main(int argc, char *argv[])
{
std::cout << "To create a quad, press the up arrow. To remove the most recently created quad, press the down arrow." << std::endl;
init();
gameLoop();
cleanUp();
return 0;
}
At the moment I'm using GL_TRIANGLE_STRIPS with glMultiDrawArrays() to render my quads. This works, and seems do be pretty decent in terms of performance, but I have to wonder whether using GL_TRIANGLES in conjunction with an IBO to avoid duplicate vertices would be a more efficient way to render? I've done some research, and some people suggest that indexed GL_TRIANGLES generally outperform GL_TRIANGLE_STRIPS, but they also seem to assume that the number of quads would remain constant, and thus the size of the VBO and IBO would not have to be rebuffered each frame. That's my biggest hesitation with indexed GL_TRIANGLES: if I did implement indexed GL_TRIANGLES, I would have to rebuffer the entire index buffer each frame in addition to rebuffering the entire VBO each frame, again because of the dynamic number of quads.
So basically, my question is this: Given that I have to rebuffer all of my vertex data to the GPU each frame due to the dynamic number of quads, would it be more efficient to switch to indexed GL_TRIANGLES to draw the quads, or should I stick with my current GL_TRIANGLE_STRIP implementation?
You'll probably be fine using un-indexed GL_QUADS
/GL_TRIANGLES
and a glDrawArrays()
call.
SDL_Surface *screen;
...
screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);
...
SDL_FreeSurface(screen);
Don't do that:
The returned surface is freed by
SDL_Quit
and must not be freed by the caller. This rule also includes consecutive calls toSDL_SetVideoMode
(i.e. resize or resolution change) because the existing surface will be released automatically.
EDIT: Simple vertex array demo:
// g++ main.cpp -lglut -lGL
#include <GL/glut.h>
#include <vector>
using namespace std;
// OpenGL Mathematics (GLM): http://glm.g-truc.net/
#include <glm/glm.hpp>
#include <glm/gtc/random.hpp>
using namespace glm;
struct SpriteWrangler
{
SpriteWrangler( unsigned int aSpriteCount )
{
verts.resize( aSpriteCount * 6 );
states.resize( aSpriteCount );
for( size_t i = 0; i < states.size(); ++i )
{
states[i].pos = linearRand( vec2( -400, -400 ), vec2( 400, 400 ) );
states[i].vel = linearRand( vec2( -30, -30 ), vec2( 30, 30 ) );
Vertex vert;
vert.r = (unsigned char)linearRand( 64.0f, 255.0f );
vert.g = (unsigned char)linearRand( 64.0f, 255.0f );
vert.b = (unsigned char)linearRand( 64.0f, 255.0f );
vert.a = 255;
verts[i*6 + 0] = verts[i*6 + 1] = verts[i*6 + 2] =
verts[i*6 + 3] = verts[i*6 + 4] = verts[i*6 + 5] = vert;
}
}
void wrap( const float minVal, float& val, const float maxVal )
{
if( val < minVal )
val = maxVal - fmod( maxVal - val, maxVal - minVal );
else
val = minVal + fmod( val - minVal, maxVal - minVal );
}
void Update( float dt )
{
for( size_t i = 0; i < states.size(); ++i )
{
states[i].pos += states[i].vel * dt;
wrap( -400.0f, states[i].pos.x, 400.0f );
wrap( -400.0f, states[i].pos.y, 400.0f );
float size = 20.0f;
verts[i*6 + 0].pos = states[i].pos + vec2( -size, -size );
verts[i*6 + 1].pos = states[i].pos + vec2( size, -size );
verts[i*6 + 2].pos = states[i].pos + vec2( size, size );
verts[i*6 + 3].pos = states[i].pos + vec2( size, size );
verts[i*6 + 4].pos = states[i].pos + vec2( -size, size );
verts[i*6 + 5].pos = states[i].pos + vec2( -size, -size );
}
}
struct Vertex
{
vec2 pos;
unsigned char r, g, b, a;
};
struct State
{
vec2 pos;
vec2 vel; // units per second
};
vector< Vertex > verts;
vector< State > states;
};
void display()
{
// timekeeping
static int prvTime = glutGet(GLUT_ELAPSED_TIME);
const int curTime = glutGet(GLUT_ELAPSED_TIME);
const float dt = ( curTime - prvTime ) / 1000.0f;
prvTime = curTime;
// sprite updates
static SpriteWrangler wrangler( 2000 );
wrangler.Update( dt );
vector< SpriteWrangler::Vertex >& verts = wrangler.verts;
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
// set up projection and camera
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
double w = glutGet( GLUT_WINDOW_WIDTH );
double h = glutGet( GLUT_WINDOW_HEIGHT );
double ar = w / h;
glOrtho( -400 * ar, 400 * ar, -400, 400, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glEnableClientState( GL_VERTEX_ARRAY );
glEnableClientState( GL_COLOR_ARRAY );
glVertexPointer( 2, GL_FLOAT, sizeof( SpriteWrangler::Vertex ), &verts[0].pos.x );
glColorPointer( 4, GL_UNSIGNED_BYTE, sizeof( SpriteWrangler::Vertex ), &verts[0].r );
glDrawArrays( GL_TRIANGLES, 0, verts.size() );
glDisableClientState( GL_VERTEX_ARRAY );
glDisableClientState( GL_COLOR_ARRAY );
glutSwapBuffers();
}
// run display() every 16ms or so
void timer( int extra )
{
glutTimerFunc( 16, timer, 0 );
glutPostRedisplay();
}
int main(int argc, char **argv)
{
glutInit( &argc, argv );
glutInitWindowSize( 600, 600 );
glutInitDisplayMode( GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE );
glutCreateWindow( "Sprites" );
glutDisplayFunc( display );
glutTimerFunc( 0, timer, 0 );
glutMainLoop();
return 0;
}
You can get decent performance with just vertex arrays.
Ideally most/all of your dt
s should be <= 16 milliseconds.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With