PDA

View Full Version : Slow deferred shading



Zire
11-22-2007, 10:32 AM
Hello.

I have a problem trying to do deferred shading using framebuffer objects. So far I have done an implementation which uses three 128 bit framebuffers to store position, normal and color. The rendering is done i two steps. In the first step geometric values are stored in the framebuffer objects (G-buffers) and in the second those G-buffers are used as input to the light shader.
One polygon is used, and one lightsource. The problem is that I measure the FPS to around 20 when rendering to 1024x1024 with a GeForce 7200.

Does anyone have any idea about why the rendering speed is so low?


#include <GL/glew.h>
#include <GL/glut.h>
#include <iostream>
#include <cmath>

#define LINUX

void reshapeWindow(int w, int h);
void setCamera(float angle);
void displayScreen();
void redrawRepeatedly();

// global variables
GLuint texture[1];
GLuint framebuffer, renderbuffer, framebufferTexture[4];
GLuint lightTexLoc[4], lightSizeLoc, lightPosLoc;
GLuint lightDiffuseLoc, lightLinearAttLoc, lightQuadAttLoc;
GLhandleARB deferringShader;
GLhandleARB lightShader;
int numDisplays = 0;
GLuint query[2];

#ifdef LINUX
#include <sys/time.h>

timeval start, end;
#endif

int textureSize[2] = { 1024, 1024 };

int main(int argc, char *argv[]) {

// initiate glut
glutInit(&amp;argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH | GLUT_STENCIL);

glutInitWindowSize(512, 512);
glutInitWindowPosition(20, 20);
glutCreateWindow("Deferred Shader");

// initiate shaders
glewInit();

// fixa deferring vertex shader
const GLcharARB *deferringVertexCode = /*"void main(void) { gl_Position = ftransform(); }"; */
"varying vec3 position;" \
"varying vec3 normal;" \
"void main(void) {" \
" position = vec3(gl_ModelViewMatrix * gl_Vertex);" \
" normal = gl_NormalMatrix * gl_Normal;" \
" gl_TexCoord[0] = gl_MultiTexCoord0;" \
" gl_Position = ftransform();" \
"}";

GLhandleARB deferringVertexShader = glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB);
glShaderSourceARB(deferringVertexShader, 1, &amp;deferringVertexCode, (GLint*)0);
glCompileShaderARB(deferringVertexShader);

// fixa deferring fragment shader
const GLcharARB *deferringFragmentCode = /*"void main(void) { gl_FragData[0] = gl_FragData[1] = gl_FragData[2] = vec4(0.0); }"; */
"varying vec3 position;" \
"varying vec3 normal;" \
"uniform sampler2D texture;" \
"uniform float emissive;" \
"void main(void) {" \
" gl_FragData[0] = vec4(position, 1.0);" \
" gl_FragData[1] = texture2D(texture, gl_TexCoord[0].st);" \
" vec3 nnormal = normalize(normal);" \
" gl_FragData[2] = vec4(nnormal, emissive);" \
"}";

GLhandleARB deferringFragmentShader = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB);
glShaderSourceARB(deferringFragmentShader, 1, &amp;deferringFragmentCode, (GLint *)0);
glCompileShaderARB(deferringFragmentShader);

// set up the deferring shader
deferringShader = glCreateProgramObjectARB();
glAttachObjectARB(deferringShader, deferringVertexShader);
glAttachObjectARB(deferringShader, deferringFragmentShader);
glLinkProgramARB(deferringShader);

// fixa light vertex shader
const GLcharARB *lightVertexCode = /*"void main(void) { gl_Position = ftransform(); }"; */
"void main(void) {" \
" gl_Position = ftransform();" \
" gl_TexCoord[0] = gl_Position;" \
" gl_TexCoord[0].xyz = gl_Position.xyz / gl_Position.w;" \
" gl_TexCoord[0] = (gl_TexCoord[0] + 1.0) / 2.0;" \
"}";

GLhandleARB lightVertexShader = glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB);
glShaderSourceARB(lightVertexShader, 1, &amp;lightVertexCode, (GLint*)0);
glCompileShaderARB(lightVertexShader);

// fixa light fragment shader
const GLcharARB *lightFragmentCode = /*"void main(void){ gl_FragColor = vec4(0.0); }"; */
"uniform sampler2D positionTex;" \
"uniform sampler2D normalTex;" \
"uniform sampler2D diffuseTex;" \
"uniform vec3 lightPosition;" \
"uniform vec3 lightDiffuse;" \
"uniform float lightSize;" \
"uniform float linearAttenuation;" \
"uniform float quadAttenuation;" \
"void main(void) {" \
" vec2 coord = gl_TexCoord[0].st;" \
" vec4 diffuse = texture2D(diffuseTex, coord);" \
" vec3 normal = vec3(texture2D(normalTex, gl_TexCoord[0].st));" \
" vec3 position = vec3(texture2D(positionTex, gl_TexCoord[0].st));" \
" vec3 lightDir = lightPosition - position;" \
" float d = length(lightDir);" \

" if(d > lightSize)" \
" discard;" \
" vec3 h = normalize(lightDir - position);" \
" float dNL = dot(normal, normalize(lightDir));" \
" vec4 color = vec4(0.0, 0.0, 0.0, 1.0);" \
" if(dNL > 0.0) {" \
" float attenuation = (1.0 - d / lightSize);" \
" float attenuationSqr = attenuation * attenuation;" \
" attenuation = attenuation * linearAttenuation + attenuationSqr * quadAttenuation;" \
" color = diffuse * dNL;" \
" color += vec4(0.7, 0.7, 0.7, 1.0) * pow(max(0.0, dot(normal, h)), 20.0);" \
" color *= attenuation;" \
" }" \
" gl_FragColor = color * diffuse;" \
"}";

GLhandleARB lightFragmentShader = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB);
glShaderSourceARB(lightFragmentShader, 1, &amp;lightFragmentCode, (GLint *)0);
glCompileShaderARB(lightFragmentShader);

// fixa light shader
lightShader = glCreateProgramObjectARB();
glAttachObjectARB(lightShader, lightVertexShader);
glAttachObjectARB(lightShader, lightFragmentShader);
glLinkProgramARB(lightShader);

// check for errors
GLsizei logSize = 0;
GLcharARB msg[2000];
glGetInfoLogARB(deferringShader, 2000, &amp;logSize, msg);
std::cerr << msg << std::endl;
glGetInfoLogARB(lightShader, 2000, &amp;logSize, msg);
std::cerr << msg << std::endl;

// get uniform locations
lightTexLoc[0] = glGetUniformLocation(lightShader, "positionTex");
lightTexLoc[1] = glGetUniformLocation(lightShader, "diffuseTex");
lightTexLoc[2] = glGetUniformLocation(lightShader, "normalTex");
lightPosLoc = glGetUniformLocation(lightShader, "lightPosition");
lightSizeLoc = glGetUniformLocation(lightShader, "lightSize");
lightDiffuseLoc = glGetUniformLocation(lightShader,"lightDiffuse");
lightLinearAttLoc = glGetUniformLocation(lightShader, "linearAttenuation");
lightQuadAttLoc = glGetUniformLocation(lightShader, "quadAttenuation");

// set blending function for the light
glBlendFunc(GL_ONE, GL_ONE);

// set up camera
setCamera(0.0f);

// display handlers
glutReshapeFunc(reshapeWindow);
glutDisplayFunc(displayScreen);
glutIdleFunc(redrawRepeatedly);

// create texture
glGenTextures(1, texture);
glBindTexture(GL_TEXTURE_2D, texture[0]);
float texData[16 * 3] = {
.5, .4, .9, .5, .5, .9, .5, .6, .9, .5, .7, .9,
.7, .4, .0, .7, .5, .0, .7, .6, .0, .7, .7, .9,
.9, .4, .0, .9, .5, .0, .9, .6, .0, .9, .7, .9,
.2, .4, .0, .2, .5, .0, .2, .6, .0, .2, .7, .9 };

glTexImage2D(GL_TEXTURE_2D, 0, 3, 4, 4, 0, GL_RGB, GL_FLOAT, texData);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);

// set up framebuffers
glGenFramebuffersEXT(1, &amp;framebuffer);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);

// conf. textures
glGenTextures(4, framebufferTexture);

glBindTexture(GL_TEXTURE_2D, framebufferTexture[0]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

glBindTexture(GL_TEXTURE_2D, framebufferTexture[1]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

glBindTexture(GL_TEXTURE_2D, framebufferTexture[2]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

glBindTexture(GL_TEXTURE_2D, framebufferTexture[3]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

// attach textures to framebuffer
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, framebufferTexture[0], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_2D, framebufferTexture[1], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT2_EXT, GL_TEXTURE_2D, framebufferTexture[2], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT3_EXT, GL_TEXTURE_2D, framebufferTexture[3], 0);

// conf. renderbuffer
glGenRenderbuffersEXT(1, &amp;renderbuffer);

glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, renderbuffer);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT32, textureSize[0], textureSize[1]);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, renderbuffer);

if(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) !=GL_FRAMEBUFFER_COMPLETE_EXT)
return -2;

#ifdef LINUX
gettimeofday(&amp;start, 0);
#endif

// done initiating
glutMainLoop();

return 0;
}

void setCamera(float angle) {
// ställ in kameran
glLoadIdentity();
gluLookAt(cosf(angle) * 5, 0.0f, sinf(angle) * 5,
0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f);
}

void reshapeWindow(int w, int h) {
glViewport(0, 0, w, h);

glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(45.0, (double)w/(double)h, .1, 10.0);
glMatrixMode(GL_MODELVIEW);
}

void redrawRepeatedly() {
glutPostRedisplay();

if(numDisplays > 1000) {
#ifdef LINUX
gettimeofday(&amp;end, 0);

int time = (end.tv_usec - start.tv_usec) / 1000;
time += 1000 * (end.tv_sec - start.tv_sec);
std::cout << (float)numDisplays * 1000.0f / (float)time << std::endl;
#endif

exit(0);
}
}

void displayScreen() {
// increase number of displays
numDisplays ++;

static float cameraAngle = 1.0f;
cameraAngle += .001;
setCamera(cameraAngle);

// render to the textures
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);

// set viewport to texture size
int viewport[4];
glGetIntegerv(GL_VIEWPORT, viewport);
glViewport(0, 0, textureSize[0], textureSize[1]);

// clear the buffers
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnable(GL_DEPTH_TEST);

// set the four textures as multitargets
GLenum buffers[4] = { GL_COLOR_ATTACHMENT0_EXT,
GL_COLOR_ATTACHMENT1_EXT,
GL_COLOR_ATTACHMENT2_EXT,
GL_COLOR_ATTACHMENT3_EXT };
glDrawBuffers(3, buffers);

// set the deferring shader
glUseProgramObjectARB(deferringShader);

// *** draw scene (the polygon)

// set texture for the polygon
glBindTexture(GL_TEXTURE_2D, texture[0]);
glEnable(GL_TEXTURE_2D);

// rita ut en polygon
glBegin(GL_QUADS);
glNormal3f(-.5f, -.5f, 1.0f);
glTexCoord2f(0.0f, 0.0f);
glVertex3f(-1.0f, -1.0f, 0.0f);

glNormal3f(-.5f, .5f, 1.0f);
glTexCoord2f(0.0f, 1.0f);
glVertex3f(-1.0f, 1.0f, 0.0f);

glNormal3f(.5f, .5f, 1.0f);
glTexCoord2f(1.0f, 1.0f);
glVertex3f(1.0f, 1.0f, 0.0f);

glNormal3f(.5f, -.5f, 1.0f);
glTexCoord2f(1.0f, 0.0f);
glVertex3f(1.0f, -1.0f, 0.0f);
glEnd();

// tag bort texturen
glDisable(GL_TEXTURE_2D);

// *** ok, done with the scene

// restore viewport
glViewport(viewport[0], viewport[1], viewport[2], viewport[3]);

// *** ambient lighting
// use fixed pipeline
glUseProgramObjectARB(0);

// render to screen, back buffer
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
glDrawBuffer(GL_BACK);

// tag bort djuptestet
glDisable(GL_DEPTH_TEST);

// use the color texture for ambient lighting
glBindTexture(GL_TEXTURE_2D, framebufferTexture[1]);
glEnable(GL_TEXTURE_2D);

// modulate with color
glColor3f(.3f, .3f, .3f);

// change to no projection
glMatrixMode(GL_PROJECTION);
glPushMatrix(); // P_PUSH
glLoadIdentity(); // P_EYE

glMatrixMode(GL_MODELVIEW);
glPushMatrix(); // M_PUSH
glLoadIdentity(); // M_EYE

// draw a screen aligned polygon
glBegin(GL_QUADS);
glTexCoord2f(0.0f, 0.0f);
glVertex3f(-1.0f, -1.0f, .0f);

glTexCoord2f(1.0f, 0.0f);
glVertex3f(1.0f, -1.0f, .0f);

glTexCoord2f(1.0f, 1.0f);
glVertex3f(1.0f, 1.0f, 0.0f);

glTexCoord2f(0.0f, 1.0f);
glVertex3f(-1.0f, 1.0f, 0.0f);
glEnd();


// *** now add lights
// enable blending
glEnable(GL_BLEND);

// use the lightshader
glUseProgramObjectARB(lightShader);

// set the G-buffers
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, framebufferTexture[0]);

glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, framebufferTexture[1]);

glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, framebufferTexture[2]);

glUniform1i(lightTexLoc[0], 0);
glUniform1i(lightTexLoc[1], 1);
glUniform1i(lightTexLoc[2], 2);

// set light values
static float lightAngle = 0.0f;
lightAngle -= .1;

glUniform3f(lightPosLoc, cosf(lightAngle) * 3.0f, .0f, sinf(lightAngle) * 3.0f);
glUniform1f(lightSizeLoc, 10.0f);
glUniform3f(lightDiffuseLoc, .7f, .7f, .7f);
glUniform1f(lightLinearAttLoc, 1.0f);
glUniform1f(lightQuadAttLoc, .0f);

// draw a screen aligned polygon
glBegin(GL_QUADS);
glTexCoord2f(0.0f, 0.0f);
glVertex3f(-1.0f, -1.0f, .0f);

glTexCoord2f(1.0f, 0.0f);
glVertex3f(1.0f, -1.0f, .0f);

glTexCoord2f(1.0f, 1.0f);
glVertex3f(1.0f, 1.0f, 0.0f);

glTexCoord2f(0.0f, 1.0f);
glVertex3f(-1.0f, 1.0f, 0.0f);
glEnd();

glActiveTexture(GL_TEXTURE0);

// disable blending
glDisable(GL_BLEND);

// change to use of perspective
glPopMatrix(); // M_POP

glDisable(GL_TEXTURE_2D);

glMatrixMode(GL_PROJECTION);
glPopMatrix(); // P_POP
glMatrixMode(GL_MODELVIEW);

// display the result
glutSwapBuffers();
}

(The code should be system independent if you remove #define LINUX on line 6, otherwise it uses sys/time.h.)

Zengar
11-22-2007, 10:53 AM
Just an educated guess: 7200 is a slow card :) Did you try other deferred rendering demos (one from nvidia, for instance)? How do they perform?

ZbuffeR
11-22-2007, 11:08 AM
Don't know much about deferred shading, but it has an important performance cost.
1024x1024 is quite a lot of bandwidth with so many buffers, maybe you are fillrate limited.

Vexator
11-22-2007, 01:47 PM
http://www.opengl.org/discussion_boards/ubbthreads.php?ubb=showflat&Number=230331&fpart=1

Zire
11-23-2007, 05:47 AM
Thanks for the replies.

I thought that GeForce 7200 was quite ok. But I will try to find demos for deferred shading runnable under Linux. I didn't found any from Nvidia thou.

Yes, I am afraid it could be fillrate limited as the speed is not improved much by almost empty shaders. Is there any way this could be verified?

I read that interesting thread about improving deferred shading. Unfortunately it's only about improving quality (at cost of speed) and for now it's only the speed I'm concerned about.

Jan
11-23-2007, 06:10 AM
To check, whether you are fill-rate limited, simply reduce your window-size (resolution). If it becomes faster, that's your bottleneck.

Jan.

Zire
11-23-2007, 06:25 AM
Thank you. Ok, then that probably is the bottleneck (somewhere between 512x512 and 1024x1025 pixels resolution). I guess it is not much to do about that (but get a faster graphicscard which better can handle the bandwidth).