I've run into a quite odd performance issue. So far I've reduced the problem to this: I'm rendering 20x20x20 cubes in a grid, using glDrawElementsInstanced
, which works fine as long as my camera is far away from the origin, however when it gets closer to the origin, it starts grinding to a halt.
I'm defining my model view projection matrix through:
float distance=3.8;
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0,0,-distance),
glm::vec3(0,0,10),
glm::vec3(0,1,0));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
With distance at 40, there's no problems, but when distance decreases to about 3.8 and lower, everything grinds to a halt.
The actual call to rendering is carried out through:
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, indices.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
While putting all the vertices in a single buffer and rendering by calling:
glBindVertexArray(nonInstancedVAO);
glDrawArrays(GL_TRIANGLES, 0,vertices.size() );
Completely removes the behavior. Anyone who's experienced similar behavior who can point me in the direction of a solution? If failing that, anyone who's got an idea of how to track down something like this? I hoped I would be able to determine what was causing the slowdown using gDEBugger, however that just reconfirms that there aren't any other opengl calls, and doesn't really help figuring out what's taking up all the processing time.
Another note is that glDrawArraysInstanced also shows the same slowdown, and that splitting the call into 4 separate calls with a quarter of the geometry each also stops the slowdown.
Update
Here's an attempt at a minimal illustration of the problem.
//Minimal reproduction of problem
#include <stdio.h>
#include <string>
#include <fstream>
#include <stdlib.h>
#include <string.h>
#include <GL/glew.h>
#include <GLFW/glfw3.h>
// Include GLM
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <vector>
#include <iostream>
#include <stdio.h>
//Set to true to use instanced rendering (glDrawElementsInstanced), false to render a generated grid instead (glDrawElements)
#define Instanced true
//Translation from origin. Problme is pressent at 0 distance, but disapears at ex. 40.
const float distanceFromOrigin=0;
// Function to load shaders
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path);
int main(){
int width, height;
bool running = true;
// Initialise GLFW
glfwInit();
glfwWindowHint(GLFW_SAMPLES,1);
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT,GL_TRUE);
glfwWindowHint(GLFW_VERSION_MAJOR, 4);
GLFWwindow* windowRef = glfwCreateWindow( 512, 512, "",0,0);
glfwMakeContextCurrent(windowRef);
glewInit();
//Load Shader
GLuint programID = LoadShaders( "Simple.vs.c", "Simple.fs.c" );
GLuint MatrixID = glGetUniformLocation(programID, "MVP");
glUseProgram(programID);
glm::mat4 Model,Projection,MVP,View,checkMVP;
std::vector<GLuint> sqIndice = {3,2,1,1,0,3,4,5,6,6,7,4,0,4,7,7,3,0,0,1,5,5,4,0,2,3,7,7,6,2,6,5,1,1,2,6,0,4,7,7,3,0};
std::vector<GLfloat> sqVertex = {-1, 1, -1, -1, 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 1, -1, -1};
std::vector<GLfloat> sqColor = {0.2472,0.24,0.6,0.6,0.24,0.442893,0.6,0.547014,0.24,0.24,0.6,0.33692,0.24,0.353173,0.6,0.6,0.24,0.563266,0.6,0.426641,0.24,0.263452,0.6,0.24};
const float lattice = 5;
const int mxn = 10;
std::vector<GLfloat> v1 = {lattice,-1,0};
std::vector<GLfloat> v2 = {1,lattice,0};
std::vector<GLfloat> v3 = {0,0,lattice};
std::vector<GLfloat> offset = {0,0,-distanceFromOrigin};
std::vector<GLfloat> latticePoints,sqVertexGrid,sqColorGrid;// = {0,0,0};
std::vector<GLuint> sqIndiceGrid;
// Looping stuff to generate the full grid of "instances" to render in a single call.
int instanceCount=0;
//Generate Lattice vectors, aswell as a vector containing the full grids of indices,vertexes and colors
for(int x=-mxn;x<mxn;++x){
for(int y=-mxn;y<mxn;++y){
for(int z=-mxn;z<mxn;++z){
for(int n=0;n<3;++n){
latticePoints.push_back( x*v1[n]+y*v2[n]+z*v3[n]+offset[n] );
};
for(int elm=0;elm<sqVertex.size();elm+=3){
for(int n=0;n<3;++n){
sqVertexGrid.push_back(sqVertex[elm+n]+x*v1[n]+y*v2[n]+z*v3[n]+offset[n]);
sqColorGrid.push_back(sqColor[elm+n]);
};
};
for(int elm=0;elm<sqIndice.size();++elm){
sqIndiceGrid.push_back(sqIndice[elm]+instanceCount*sqVertex.size()/3);
};
++instanceCount;glewInit
};
};
};
#if Instanced==true
//Initialize and fill vertex,color and indice buffers with the relevant data.
GLuint cubeVAO;
glGenVertexArrays(1, &cubeVAO);
glBindVertexArray(cubeVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertex.size()*sizeof(GLfloat), &sqVertex[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColor.size()*sizeof(GLfloat), &sqColor[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndice.size()*sizeof(GLuint), &sqIndice[0], GL_STATIC_DRAW);
//Lattice point buffer
GLuint latticePointBuffer;
glGenBuffers(1, &latticePointBuffer);
glBindBuffer(GL_ARRAY_BUFFER, latticePointBuffer);
glBufferData(GL_ARRAY_BUFFER, latticePoints.size()*sizeof(GLfloat), &latticePoints[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(2);
glVertexAttribPointer(2,3,GL_FLOAT,GL_FALSE,0,(void*)0);
glVertexAttribDivisor(2,1);
glBindVertexArray(0);
#elif Instanced==false
GLuint cubeGridVAO;
glGenVertexArrays(1, &cubeGridVAO);
glBindVertexArray(cubeGridVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertexGrid.size()*sizeof(GLfloat), &sqVertexGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColorGrid.size()*sizeof(GLfloat), &sqColorGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndiceGrid.size()*sizeof(GLuint), &sqIndiceGrid[0], GL_STATIC_DRAW);
glBindVertexArray(0);
#endif
while(running)
{
glfwGetFramebufferSize(windowRef, &width, &height);
height = height > 0 ? height : 1;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0.0f,0.0f,-(distanceFromOrigin+3.8f)),
glm::vec3(0.0f,0.0f,100.0f),
glm::vec3(0.0f,1.0f,0.0f));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
MVP = Projection*View*Model;
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, glm::value_ptr(MVP));
#if Instanced==true
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
#elif Instanced==false
glBindVertexArray(cubeGridVAO);
glDrawElements(GL_TRIANGLES, sqIndiceGrid.size(),GL_UNSIGNED_INT,(GLvoid*)(0));
#endif
glfwPollEvents();
glfwSwapBuffers(windowRef);
std::cout<<".\n";
running = !glfwGetKey(windowRef,GLFW_KEY_ESCAPE) && !glfwWindowShouldClose(windowRef);
}
glfwDestroyWindow(windowRef);
glfwTerminate();
return 0;
};
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path){
// Create the shaders
GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
// Read the Vertex Shader code from the file
std::string VertexShaderCode;
std::ifstream VertexShaderStream(vertex_file_path, std::ios::in);
if(VertexShaderStream.is_open()){
std::string Line = "";
while(getline(VertexShaderStream, Line))
VertexShaderCode += "\n" + Line;
VertexShaderStream.close();
}else{
printf("Impossible to open %s. Are you in the right directory?\n", vertex_file_path);
return 0;
}
// Read the Fragment Shader code from the file
std::string FragmentShaderCode;
std::ifstream FragmentShaderStream(fragment_file_path, std::ios::in);
if(FragmentShaderStream.is_open()){
std::string Line = "";
while(getline(FragmentShaderStream, Line))
FragmentShaderCode += "\n" + Line;
FragmentShaderStream.close();
}
GLint Result = GL_FALSE;
int InfoLogLength;
// Compile Vertex Shader
printf("Compiling shader : %s\n", vertex_file_path);
char const * VertexSourcePointer = VertexShaderCode.c_str();
glShaderSource(VertexShaderID, 1, &VertexSourcePointer , NULL);
glCompileShader(VertexShaderID);
// Check Vertex Shader
glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> VertexShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
printf("%s\n", &VertexShaderErrorMessage[0]);
}
// Compile Fragment Shader
printf("Compiling shader : %s\n", fragment_file_path);
char const * FragmentSourcePointer = FragmentShaderCode.c_str();
glShaderSource(FragmentShaderID, 1, &FragmentSourcePointer , NULL);
glCompileShader(FragmentShaderID);
// Check Fragment Shader
glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> FragmentShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
printf("%s\n", &FragmentShaderErrorMessage[0]);
}
// Link the program
printf("Linking program\n");
GLuint ProgramID = glCreateProgram();
glAttachShader(ProgramID, VertexShaderID);
glAttachShader(ProgramID, FragmentShaderID);
glLinkProgram(ProgramID);
// Check the program
glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result);
glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> ProgramErrorMessage(InfoLogLength+1);
glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &ProgramErrorMessage[0]);
printf("%s\n", &ProgramErrorMessage[0]);
}
glDeleteShader(VertexShaderID);
glDeleteShader(FragmentShaderID);
return ProgramID;
}
Ok, take a deep breath and take a seat: your problem is graphic card memory speed.
But you could make it easier for the GPU by fixing this bug:
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
glDrawElementsInstanced
expects the number of instances to draw as the last parameter. But you pass the number of elements in latticePoints
. That's 3 times the number of instances. This results in a zero lettice point inside the shader (because of prevented out of bounds access). So 16000 cubes are not translated and are painted at the same position. This results in painting the front face of the cubes 16000 times. The depth buffer doesn't prevent this because the faces do not conceal each other, they are on the same spot.
So when your distanceFromOrigin
decreases the 16000 center cubes get bigger and bigger. OpenGL has to draw more and more pixels. A lot more, to be exact. It has to draw so much that it hits the speed limit of the graphic card memory.
Read Diagnose OpenGl Performance Problems for the whole story.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With