Last week I had a frustrating problem with depth testing. I had a program, which worked perfectly on an NVIDIA card (driver version 364.72), but not on AMD (driver version 15.201.1001.1005). It turned out that the problem was due to the array insinde a GLSL uniform block: It is due to the bug on AMD driver, I think, or maybe my original code didn’t obey OpenGL specification and AMD couldn’t swallow it. Anyway, the bug was hard to find, because no glGetError or glGetInfoLog errors found and data inside uniform block seemed to be valid. But only seemed, because the data inside an uniform block array was just gibberish on an AMD card. To help someone else avoid this bug, I’ll show below which way work and which doesn’t. Let’s start the wrong one:
NOTE: CODE BELOW DOESN’T WORK ON AMD CARD, BUT IT WILL WORK ON NVIDIA
In a shader program we have uniform block
layout(shared) uniform TILES {
int tiiliaX;
int tiiliaY;
int fmodX;
int fmodY;
float horizontalLevel[4*(MAKSIMIRIVI/BLOCK_SIZE+2)]; //this is an array which won't work on AMD
float verticalLevel[4*(MAKSIMIRIVI/BLOCK_SIZE+2)]; //this is an array which won't work on AMD
};
and it’s pair on CPU side is
struct TILES {
int tiiliaX;
int tiiliaY;
int fmodX;
int fmodY;
float horizontalLevel[4*(MAKSIMIRIVI/BLOCK_SIZE+2)];
float verticalLevel[4*(MAKSIMIRIVI/BLOCK_SIZE+2)];
GLint locations[6];
GLuint bindingPoint;
} tiles;
and we want to use it on three separate shader programs:
//lets assing a binding point
ubIndex=glGetUniformBlockIndex(passes[5].ohjelmaID, "TILES");
glUniformBlockBinding(passes[5].ohjelmaID, ubIndex, tiles.bindingPoint);
ubIndex=glGetUniformBlockIndex(passes[6].ohjelmaID, "TILES");
glUniformBlockBinding(passes[6].ohjelmaID, ubIndex, tiles.bindingPoint);
ubIndex=glGetUniformBlockIndex(passes[7].ohjelmaID, "TILES");
glUniformBlockBinding(passes[7].ohjelmaID, ubIndex, tiles.bindingPoint);
//here we create a single unifrom block
glGetActiveUniformBlockiv(passes[5].ohjelmaID, ubIndex, GL_UNIFORM_BLOCK_DATA_SIZE, &uboSize2);
glGenBuffers(1, &uboBuffer2);
glBindBuffer(GL_UNIFORM_BUFFER, uboBuffer2);
glBufferData(GL_UNIFORM_BUFFER, uboSize2, NULL, GL_DYNAMIC_DRAW);
//this buffer is on a CPU side. We collect all uniform block data into it first and then send it to the GPU on a single call
cpuBuffer3=(GLubyte*) malloc(uboSize2);
//need to get a location to every struct item
const GLchar *muuttujat2[] = {"tiiliaX", "tiiliaY", "fmodX", "fmodY", "horizontalLevel", "verticalLevel"};
GLuint indeksit2[6];
glGetUniformIndices(passes[6].ohjelmaID, 6, muuttujat2, indeksit2);
glGetActiveUniformsiv(passes[6].ohjelmaID, 6, indeksit2, GL_UNIFORM_OFFSET, tiles.locations);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
To fill above uniform block, use code below:
glBindBuffer(GL_UNIFORM_BUFFER, uboBuffer2);
glBindBufferBase(GL_UNIFORM_BUFFER, tiles.bindingPoint, uboBuffer2);
memcpy(cpuBuffer3 + tiles.locations[0], &tiles.tiiliaX, sizeof(GLint));
memcpy(cpuBuffer3 + tiles.locations[1], &tiles.tiiliaY, sizeof(GLint));
memcpy(cpuBuffer3 + tiles.locations[2], &tiles.horizontalLevel, 4*(MAKSIMIRIVI/BLOCK_SIZE+2)*sizeof(GLfloat));
memcpy(cpuBuffer3 + tiles.locations[3], &tiles.verticalLevel, 4*(MAKSIMIRIVI/BLOCK_SIZE+2)*sizeof(GLfloat));
memcpy(cpuBuffer3 + tiles.locations[4], &tiles.fmodX, sizeof(GLint));
memcpy(cpuBuffer3 + tiles.locations[5], &tiles.fmodY, sizeof(GLint));
glBufferData( GL_UNIFORM_BUFFER, uboSize2, cpuBuffer3, GL_DYNAMIC_DRAW );
Okay, that was the original version, which work on NVIDIA but not AMD. Next take a look a source which works both on NVIDIA and on AMD:
NOTE: CODE BELOW WILL WORK BOTH NVIDIA AND AMD
In a shader program we have a uniform block
layout(std140, shared, column_major) uniform TILES {
ivec4 tiilet;
vec4 horizontalLEvel[MAKSIMIRIVI/BLOCK_SIZE+2];
vec4 verticalLevel[MAKSIMIRIVI/BLOCK_SIZE+2];
};
and it’s pair on CPU side is
struct TILES {
float horizontalLevel[4*(MAKSIMIRIVI/BLOCK_SIZE+2)];
float verticalLevel[4*(MAKSIMIRIVI/BLOCK_SIZE+2)];
int tiilet[4];
GLint locations[3];
GLuint bindingPoint;
GLint sizes[3];
} tiles;
and we want to use it on three separate shader programs:
//lets assing a binding point
ubIndex=glGetUniformBlockIndex(passes[5].ohjelmaID, "TILES");
glUniformBlockBinding(passes[5].ohjelmaID, ubIndex, tiles.bindingPoint);
ubIndex=glGetUniformBlockIndex(passes[6].ohjelmaID, "TILES");
glUniformBlockBinding(passes[6].ohjelmaID, ubIndex, tiles.bindingPoint);
ubIndex=glGetUniformBlockIndex(passes[7].ohjelmaID, "TILES");
glUniformBlockBinding(passes[7].ohjelmaID, ubIndex, tiles.bindingPoint);
//here we create a single unifrom block
glGetActiveUniformBlockiv(passes[5].ohjelmaID, ubIndex, GL_UNIFORM_BLOCK_DATA_SIZE, &uboSize2);
glGenBuffers(1, &uboBuffer2);
glBindBuffer(GL_UNIFORM_BUFFER, uboBuffer2);
glBufferData(GL_UNIFORM_BUFFER, uboSize2, NULL, GL_DYNAMIC_DRAW);
//need to get a location to every struct item
const GLchar *muuttujat2[] = {"tiilet", "horizontalLevel", "verticalLevel"};
GLuint indeksit2[3];
glGetUniformIndices(passes[6].ohjelmaID, 3, muuttujat2, indeksit2);
glGetActiveUniformsiv(passes[6].ohjelmaID, 3, indeksit2, GL_UNIFORM_OFFSET, tiles.locations);
glGetActiveUniformsiv(passes[6].ohjelmaID, 3, indeksit2, GL_UNIFORM_SIZE, tiles.sizes);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
To fill above uniform block, use code below:
glBindBuffer(GL_UNIFORM_BUFFER, uboBuffer2);
glBindBufferBase(GL_UNIFORM_BUFFER, tiles.bindingPoint, uboBuffer2);
glBufferSubData(GL_UNIFORM_BUFFER, tiles.locations[0], tiles.sizes[0]*4*sizeof(GLint), tiles.tiilet);
glBufferSubData(GL_UNIFORM_BUFFER, tiles.locations[1], tiles.sizes[1]*4*sizeof(GLfloat), tiles.horizontalLevel);
glBufferSubData(GL_UNIFORM_BUFFER, tiles.locations[2], tiles.sizes[2]*4*sizeof(GLfloat), tiles.verticalLevel);
While there is std140 layout, you cannot rely on it. Actually code above doesn’t need it, it’s just to make AMD happy.
Have a nice day