Thanks for your comments!
See below the complete source code of the vertex shader and the fragment shader. It is by far not yet a working algorithm for SSAO, but I have to understand the performance issue before I can go on. I now tested it on another PC, getting similar problems:
Vendor: NVIDIA Corporation
Renderer: NVS 4200M/PCI/SSE2
Version: 4.2.0
GLSL: 4.20 NVIDIA via Cg compiler
OpenGL context version parsed by GLFW: 4.2.0
Notice the line with “ind = i % 64”. If it is commented, the shader takes 7.6ms to execute. With the line included, it takes 881ms! I measure the execution time with a query of GL_TIME_ELAPSED. So there is the same problem with Nvidia, and even worse (though both graphic cards are in the lower performance segment). Funny thing is, it looks like the GLSL compiler decides it can’t predict what happens at the presence of the %-operator. If I instead do “ind = i+1”, there is no performance loss. But that isn’t much of randomness
Using “num += int(step(dist, ref));” didn’t produce any difference, as you suspected. The measured times are very stable.
I have another shader for using shadowmaps with oversampling. It had the same problem when I used random samples, so I went for using a very large set of samples instead. Obviously not satisfactorily, as it takes a lot of time.
// This vertex shader will only draw two triangles, giving a full screen.
// The vertex input is 0,0 in one corner and 1,1 in the other.
static const GLchar *vertexShaderSource[] = {
"#version 330
", // This corresponds to OpenGL 3.3
"layout(location = 0) in vec2 vertex;
",
"out vec2 screen;
", // Screen coordinate
"void main(void)
",
"{
",
" gl_Position = vec4(vertex*2-1, 0, 1);
", // Transform from interval 0 to 1, to interval -1 to 1.
" screen = vertex;
", // Copy position to the fragment shader. Only x and y is needed.
"}
",
};
static const GLchar *fragmentShaderSource[] = {
"#version 330
", // This corresponds to OpenGL 3.3
"layout(std140) uniform GlobalData {"
" mat4 UBOProjectionMatrix;"
" mat4 UBOProjectionviewMatrix;"
" mat4 UBOViewMatrix;"
" vec4 UBOCamera;"
" float UBOViewingDistance;"
" int UBOPerformance;"
" int UBODynamicshadows;"
"};
"
"uniform sampler2D posTex;
", // World position
"uniform sampler2D normalTex;
", // Normals
"in vec2 screen;
", // The screen position
"layout(location = 0) out float light;
",
"vec4 worldPos;
",
"vec4 normal;
",
"const vec2 poissonDisk[64] = vec2[]("
" vec2( 0.282571, 0.023957 ),"
" vec2( 0.792657, 0.945738 ),"
" vec2( 0.922361, 0.411756 ),"
" vec2( 0.165838, 0.552995 ),"
" vec2( 0.566027, 0.216651 ),"
" vec2( 0.335398, 0.783654 ),"
" vec2( 0.0190741, 0.318522 ),"
" vec2( 0.647572, 0.581896 ),"
" vec2( 0.916288, 0.0120243 ),"
" vec2( 0.0278329, 0.866634 ),"
" vec2( 0.398053, 0.4214 ),"
" vec2( 0.00289926, 0.051149 ),"
" vec2( 0.517624, 0.989044 ),"
" vec2( 0.963744, 0.719901 ),"
" vec2( 0.76867, 0.018128 ),"
" vec2( 0.684194, 0.167302 ),"
" vec2( 0.727103, 0.410871 ),"
" vec2( 0.557482, 0.724143 ),"
" vec2( 0.483352, 0.0527055 ),"
" vec2( 0.162877, 0.351482 ),"
" vec2( 0.959716, 0.180578 ),"
" vec2( 0.140355, 0.112003 ),"
" vec2( 0.796228, 0.223365 ),"
" vec2( 0.187048, 0.787225 ),"
" vec2( 0.55446, 0.35612 ),"
" vec2( 0.449965, 0.640522 ),"
" vec2( 0.438917, 0.194769 ),"
" vec2( 0.791253, 0.565325 ),"
" vec2( 0.719718, 0.794794 ),"
" vec2( 0.0651875, 0.708609 ),"
" vec2( 0.641987, 0.0233772 ),"
" vec2( 0.376415, 0.944243 ),"
" vec2( 0.827723, 0.723258 ),"
" vec2( 0.968627, 0.884518 ),"
" vec2( 0.263405, 0.458968 ),"
" vec2( 0.985717, 0.559587 ),"
" vec2( 0.0616169, 0.468612 ),"
" vec2( 0.159154, 0.934782 ),"
" vec2( 0.287301, 0.284768 ),"
" vec2( 0.550066, 0.849391 ),"
" vec2( 0.353587, 0.003296 ),"
" vec2( 0.000671407, 0.582507 ),"
" vec2( 0.850459, 0.461989 ),"
" vec2( 0.526139, 0.640126 ),"
" vec2( 0.786889, 0.487686 ),"
" vec2( 0.164129, 0.02472 ),"
" vec2( 0.517075, 0.90933 ),"
" vec2( 0.316111, 0.663564 ),"
" vec2( 0.09476, 0.895749 ),"
" vec2( 0.298288, 0.195318 ),"
" vec2( 0.427229, 0.7828 ),"
" vec2( 0.734764, 0.266152 ),"
" vec2( 0.0816065, 0.965972 ),"
" vec2( 0.698935, 0.646352 ),"
" vec2( 0.281899, 0.355144 ),"
" vec2( 0.871334, 0.303171 ),"
" vec2( 0.138249, 0.661214 ),"
" vec2( 0.202399, 0.252449 ),"
" vec2( 0.0734275, 0.399853 ),"
" vec2( 0.786767, 0.660268 ),"
" vec2( 0.933744, 0.508621 ),"
" vec2( 0.398236, 0.0509049 ),"
" vec2( 0.500473, 0.130253 ),"
" vec2( 0.0332957, 0.526292 )"
");"
"void main(void)
",
"{
",
" normal = texture(normalTex, screen);
", // Not used
" worldPos = texture(posTex, screen);
",
" float ref = distance(UBOCamera.xyz, worldPos.xyz);"
" int num = 0;"
" const int SIZE=20;"
" const float p = 1.0/1900;
" // Size of one pixel
" for (int i=0; i<SIZE; i++) {"
" int ind = i;"
//" ind = i % 64;"
" vec2 sampleInd = screen + (2.0*poissonDisk[ind]-1.0)*p*10;"
" vec3 sample = texture(posTex, sampleInd).xyz;
"
" float dist = distance(UBOCamera.xyz, sample);"
" if (dist < ref) num++;
"
" }"
" if (num > SIZE*0.7)"
" light = 0.5;
"
" else {"
" discard; return;"
" }"
// " light = worldPos.a;
",
"}
",
};