First thing, why recompute sizeof(blockPerBatch) with each glBufferData(…sizeof(blockPerBatch)…). I would make a single call GLuint sizeof_blockPerBatch = sizeof(blockPerBatch) then glBufferData(…sizeof_blockPerBatch…).
Second, do you get a speed/FPS improvement if you use layout(std140) in your shader like
layout(std140) uniform BlockPerBatch
{
mat4 matLocal;
mat4 matMVP;
vec2 uvBase;
vec2 perlinMovement;
vec3 localEye;
};
Code w/ UBO:
GLuint uniformBlock_blockPerBatch_id;
GLfloat blockPerBatch[] =
{ //layout(std140) uniform matrix1
1.0,0.0,0.0,0.0, //mat4 matLocal
0.0,1.0,0.0,0.0,
0.0,0.0,1.0,0.0,
0.0,0.0,0.0,1.0,
1.0,0.0,0.0,0.0, //mat4 matMVP
0.0,1.0,0.0,0.0,
0.0,0.0,1.0,0.0,
0.0,0.0,0.0,1.0,
0.0,0.0, 1,1, //vec2 uvBase (last 1,1 is filler)
0.0,0.0, 1,1, //vec2 perlinMovement (last 1,1 is filler)
0.0,0.0,0, 1, //vec3 localEye (last ,1 is filler)
};
GLuint sizeof_blockPerBatch = sizeof(blockPerBatch);
//convenience map into blockPerBatch
mat4 &matLocal = (mat4&)uniformBlock_matrix1[0];
mat4 &matMVP = (mat4&)uniformBlock_matrix1[16];
vec2 &uvBase = (vec2&)uniformBlock_matrix1[32];
vec2 &perlinMovement = (vec2&)uniformBlock_matrix1[36];
vec3 &localEye = (vec3&)uniformBlock_matrix1[40];
defineUniformBlockObject(0,"BlockPerBatch",uniformBlock_blockPerBatch_id); // once for all batches
matLocal = ...;
matMVP = ...;
uvBase = ...;
perlinMovement = ...;
localEye = ...;
glBindBuffer(GL_UNIFORM_BUFFER, uniformBlock_blockPerBatch_id);
glBufferData(GL_UNIFORM_BUFFER, sizeof_blockPerBatch, &blockPerBatch, GL_DYNAMIC_DRAW); // don't recompute sizeof() every call!
where the helper defineUniformBlockObject function is
void defineUniformBlockObject(GLuint binding_point, const char *GLSL_block_string, GLuint &uniformBlock_id)
{
glGenBuffers(1, &uniformBlock_id);
//"layout(std140) uniform GLSL_block_string"
GLuint uniformBlockIndex = glGetUniformBlockIndex(shader_id, GLSL_block_string);
//And associate the uniform block to binding point
glUniformBlockBinding(shader_id, uniformBlockIndex, binding_point);
//Now we attach the buffer to UBO binding_point...
glBindBufferBase(GL_UNIFORM_BUFFER, binding_point, uniformBlock_id);
//We need to get the uniform block's size in order to back it with the
//appropriate buffer
GLsizei uniformBlockSize;
glGetActiveUniformBlockiv(shader_id, uniformBlockIndex,
GL_UNIFORM_BLOCK_DATA_SIZE,
&uniformBlockSize);
//Create UBO.
glBindBuffer(GL_UNIFORM_BUFFER, uniformBlock_id);
glBufferData(GL_UNIFORM_BUFFER, uniformBlockSize, NULL, GL_DYNAMIC_DRAW);
}
I see speed improvement using this over a bunch of separate glUniform* calls. But I haven’t tested it extensively. I would be curious if in your case using “layout(std140) uniform” has any effect.