Hello,
I’m just trying to play with the compute shader (GL 4.3) and I’m facing some issues with the way I’m filling my shader uniform buffer.
This is my shader code:
#version 430 core
layout (local_size_x = 32) in;
layout(binding=0) buffer inputBuffer {
float input[];
};
// don't use std140 for this, I must understand why...
layout(binding=1) buffer outputBuffer {
float output[];
};
void main()
{
const uint offset = gl_GlobalInvocationID.x;
if(offset % 2 == 0)
output[offset] = input[offset];
else
output[offset] = -input[offset];
}
Which is really simple, it takes a buffer as input and write the same buffer as output with all odd numbers negated.
this is my GL code:
// cc -Wall -Wextra -g -std=c99 -lglut -lGLEW -lGL test_debug.c -o test_debug
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/freeglut.h>
struct State
{
GLuint buffers[2];
GLuint shader_program;
} state;
void debug
(GLenum source,
GLenum type,
GLuint id,
GLenum severity,
GLsizei length,
const GLchar* message,
GLvoid* userParams)
{
printf("DEBUG: %s
", message);
}
char *read_file(char *filename)
{
FILE* fp = fopen(filename, "r");
assert(fp);
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char * data = malloc(sizeof(char) * size + 1);
size_t r = fread(data, sizeof(char), size, fp);
assert(r == size);
data[size] = '\0';
fclose(fp);
return data;
}
void init2()
{
// Compute shader creation
GLuint compute_shader = glCreateShader(GL_COMPUTE_SHADER);
const GLchar * csSrc = read_file("test.cs");
GLint size = strlen(csSrc);
glShaderSource(compute_shader, 1, &csSrc, &size);
glCompileShader(compute_shader);
// link of program
state.shader_program = glCreateProgram();
glAttachShader(state.shader_program, compute_shader);
glLinkProgram(state.shader_program);
// Display errors
char infoLog[10000];
glGetProgramInfoLog(state.shader_program, sizeof(infoLog), NULL, infoLog);
printf("%s
", infoLog);
// buffer creation (input and output)
glGenBuffers(2, state.buffers);
// fill the input buffer with numbers
float data[32];
for(unsigned int i = 0; i < 32; ++i)
data[i] = i;
glBindBuffer( GL_SHADER_STORAGE_BUFFER, state.buffers[0] );
glBufferData( GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW );
// resize the output buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, state.buffers[1]);
// XXX depending of the choice I use, I got right/wrong results
//GLenum choice = GL_STATIC_DRAW; // NO
//GLenum choice = GL_STATIC_READ; // NO
//GLenum choice = GL_STATIC_COPY; // NO
//GLenum choice = GL_DYNAMIC_DRAW; // NO
//GLenum choice = GL_DYNAMIC_READ; // YES
//GLenum choice = GL_DYNAMIC_COPY; // YES
//GLenum choice = GL_STREAM_DRAW; // NO
GLenum choice = GL_STREAM_READ; // YES
//GLenum choice = GL_STREAM_COPY; // YES
glBufferData( GL_SHADER_STORAGE_BUFFER, sizeof(data), NULL, choice);
glMemoryBarrier(GL_ALL_BARRIER_BITS);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
void renderScene2()
{
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, state.buffers[0]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, state.buffers[1]);
glUseProgram(state.shader_program);
// Ugly memory sync because I don't know what I'm doing, I want to be sure that all my data are sent
glMemoryBarrier(GL_ALL_BARRIER_BITS);
glDispatchCompute(1, 1, 1);
// Ugly memory sync because I don't know what I'm doing, I want to bu sure that my shader correctly modified the data
glMemoryBarrier(GL_ALL_BARRIER_BITS);
glUseProgram(0);
// Get the resul0t back
float result[32];
// Ugly init of the result vector for debug
for(unsigned int i = 0; i < 32; ++i)
result[i] = -2;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, state.buffers[1]);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(result), result);
// Get and display the results
for(unsigned int i = 0; i < 32; ++i)
printf("%f
", result[i]);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
int main(int argc, char **argv) {
// init GLUT and create Window
glutInit(&argc, argv);
//glutInitContextVersion(4, 3);
glutInitContextFlags(GLUT_CORE_PROFILE | GLUT_DEBUG);
glutInitDisplayMode(GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA);
glutInitWindowSize(800, 600);
glutCreateWindow("Instance Test");
// C'est pour glew, pour recuperer toutes les extensions
glewInit();
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(&debug, NULL);
printf("VERSION: %s
", glGetString(GL_VERSION));
printf("RENDERER: %s
", glGetString(GL_RENDERER));
printf("VENDOR: %s
", glGetString(GL_VENDOR));
GLint mcsms;
glGetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &mcsms);
printf("GL_MAX_COMPUTE_SHARED_MEMORY_SIZE: %d
", mcsms);
glutDisplayFunc(renderScene2);
init2();
renderScene2();
//glutMainLoop();
return 0;
}
I tried to set enough comments, but the main issue comes from the fact that the result can totally change depending on the usage flag of the glBufferSubData command for the second (output) buffer. (See the GLenum choice lines, thoses with a NO does not works and thoses with the YES works).
It appears from what I read in the spec that the usage flag may not change the behavior of the driver, so is this a driver bug ? Or is this a driver different behavior (because of the usage hint) which leads to a different result because of a bug in my code?
Please note that I putted synchronisation everywhere because I’m not too sure of where I need to put them.
Thank you for your help.
Last but not least, I runned this on Arch Linux with Nvidia drivers 304.15 on a GeForce 560 Ti.