Slow deferred shading

Hello.

I have a problem trying to do deferred shading using framebuffer objects. So far I have done an implementation which uses three 128 bit framebuffers to store position, normal and color. The rendering is done i two steps. In the first step geometric values are stored in the framebuffer objects (G-buffers) and in the second those G-buffers are used as input to the light shader.
One polygon is used, and one lightsource. The problem is that I measure the FPS to around 20 when rendering to 1024x1024 with a GeForce 7200.

Does anyone have any idea about why the rendering speed is so low?

#include <GL/glew.h>
#include <GL/glut.h>
#include <iostream>
#include <cmath>

#define LINUX

void reshapeWindow(int w, int h);
void setCamera(float angle);
void displayScreen();
void redrawRepeatedly();

// global variables
GLuint texture[1];
GLuint framebuffer, renderbuffer, framebufferTexture[4];
GLuint lightTexLoc[4], lightSizeLoc, lightPosLoc;
GLuint lightDiffuseLoc, lightLinearAttLoc, lightQuadAttLoc;
GLhandleARB deferringShader;
GLhandleARB lightShader;
int numDisplays = 0;
GLuint query[2];

#ifdef LINUX
#include <sys/time.h>

timeval start, end;
#endif

int textureSize[2] = { 1024, 1024 };

int main(int argc, char *argv[]) {
	
	// initiate glut
	glutInit(&argc, argv);
	glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH | GLUT_STENCIL);

	glutInitWindowSize(512, 512);
	glutInitWindowPosition(20, 20);
	glutCreateWindow("Deferred Shader");

	// initiate shaders
	glewInit();
	
	// fixa deferring vertex shader
	const GLcharARB *deferringVertexCode = /*"void main(void) { gl_Position = ftransform(); }"; */
		"varying vec3 position;" \
		"varying vec3 normal;" \
		"void main(void) {" \
		"	position = vec3(gl_ModelViewMatrix * gl_Vertex);" \
		"	normal = gl_NormalMatrix * gl_Normal;" \
		"	gl_TexCoord[0] = gl_MultiTexCoord0;" \
		"	gl_Position = ftransform();" \
		"}";

	GLhandleARB deferringVertexShader = glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB);
	glShaderSourceARB(deferringVertexShader, 1, &deferringVertexCode, (GLint*)0);
	glCompileShaderARB(deferringVertexShader);

	// fixa deferring fragment shader
	const GLcharARB *deferringFragmentCode = /*"void main(void) { gl_FragData[0] = gl_FragData[1] = gl_FragData[2] = vec4(0.0); }"; */
		"varying vec3 position;" \
		"varying vec3 normal;" \
		"uniform sampler2D texture;" \
		"uniform float emissive;" \
		"void main(void) {" \
		"	gl_FragData[0] = vec4(position, 1.0);" \
		"	gl_FragData[1] = texture2D(texture, gl_TexCoord[0].st);" \
		"	vec3 nnormal = normalize(normal);" \
		"	gl_FragData[2] = vec4(nnormal, emissive);" \
		"}";

	GLhandleARB deferringFragmentShader = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB);
	glShaderSourceARB(deferringFragmentShader, 1, &deferringFragmentCode, (GLint *)0);
	glCompileShaderARB(deferringFragmentShader);

	// set up the deferring shader
	deferringShader = glCreateProgramObjectARB();
	glAttachObjectARB(deferringShader, deferringVertexShader);
	glAttachObjectARB(deferringShader, deferringFragmentShader);
	glLinkProgramARB(deferringShader);
	
	// fixa light vertex shader
	const GLcharARB *lightVertexCode = /*"void main(void) { gl_Position = ftransform(); }"; */
		"void main(void) {" \
		"	gl_Position = ftransform();" \
		"	gl_TexCoord[0] = gl_Position;" \
		"	gl_TexCoord[0].xyz = gl_Position.xyz / gl_Position.w;" \
		"	gl_TexCoord[0] = (gl_TexCoord[0] + 1.0) / 2.0;" \
		"}";

	GLhandleARB lightVertexShader = glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB);
	glShaderSourceARB(lightVertexShader, 1, &lightVertexCode, (GLint*)0);
	glCompileShaderARB(lightVertexShader);

	// fixa light fragment shader
	const GLcharARB *lightFragmentCode = /*"void main(void){ gl_FragColor = vec4(0.0); }"; */
		"uniform sampler2D positionTex;" \
		"uniform sampler2D normalTex;" \
		"uniform sampler2D diffuseTex;" \
		"uniform vec3 lightPosition;" \
		"uniform vec3 lightDiffuse;" \
		"uniform float lightSize;" \
		"uniform float linearAttenuation;" \
		"uniform float quadAttenuation;" \
		"void main(void) {" \
		"	vec2 coord = gl_TexCoord[0].st;" \
		"	vec4 diffuse = texture2D(diffuseTex, coord);" \
		"	vec3 normal = vec3(texture2D(normalTex, gl_TexCoord[0].st));" \
		"	vec3 position = vec3(texture2D(positionTex, gl_TexCoord[0].st));" \
		"	vec3 lightDir = lightPosition - position;" \
		"	float d = length(lightDir);" \

		"	if(d > lightSize)" \
		"		discard;" \
		"	vec3 h = normalize(lightDir - position);" \
		"	float dNL = dot(normal, normalize(lightDir));" \
		"	vec4 color = vec4(0.0, 0.0, 0.0, 1.0);" \
		"	if(dNL > 0.0) {" \
		"		float attenuation = (1.0 - d / lightSize);" \
		"		float attenuationSqr = attenuation * attenuation;" \
		"		attenuation = attenuation * linearAttenuation + attenuationSqr * quadAttenuation;" \
		"		color = diffuse * dNL;" \
		"		color += vec4(0.7, 0.7, 0.7, 1.0) * pow(max(0.0, dot(normal, h)), 20.0);" \
		"		color *= attenuation;" \
		"	}" \
		"	gl_FragColor = color * diffuse;" \
		"}";

	GLhandleARB lightFragmentShader = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB);
	glShaderSourceARB(lightFragmentShader, 1, &lightFragmentCode, (GLint *)0);
	glCompileShaderARB(lightFragmentShader);

	// fixa light shader
	lightShader = glCreateProgramObjectARB();
	glAttachObjectARB(lightShader, lightVertexShader);
	glAttachObjectARB(lightShader, lightFragmentShader);
	glLinkProgramARB(lightShader);

	// check for errors
	GLsizei logSize = 0;
	GLcharARB msg[2000];
	glGetInfoLogARB(deferringShader, 2000, &logSize, msg);
	std::cerr << msg << std::endl;
	glGetInfoLogARB(lightShader, 2000, &logSize, msg);
	std::cerr << msg << std::endl;

	// get uniform locations
	lightTexLoc[0] = glGetUniformLocation(lightShader, "positionTex");
	lightTexLoc[1] = glGetUniformLocation(lightShader, "diffuseTex");
	lightTexLoc[2] = glGetUniformLocation(lightShader, "normalTex");
	lightPosLoc = glGetUniformLocation(lightShader, "lightPosition");
	lightSizeLoc = glGetUniformLocation(lightShader, "lightSize");
	lightDiffuseLoc = glGetUniformLocation(lightShader,"lightDiffuse");
	lightLinearAttLoc = glGetUniformLocation(lightShader, "linearAttenuation");
	lightQuadAttLoc = glGetUniformLocation(lightShader, "quadAttenuation");

	// set blending function for the light
	glBlendFunc(GL_ONE, GL_ONE);

	// set up camera
	setCamera(0.0f);

	// display handlers
	glutReshapeFunc(reshapeWindow);
	glutDisplayFunc(displayScreen);
	glutIdleFunc(redrawRepeatedly);

	// create texture
	glGenTextures(1, texture);
	glBindTexture(GL_TEXTURE_2D, texture[0]);
	float texData[16 * 3] = {
		.5, .4, .9,  .5, .5, .9,  .5, .6, .9,  .5, .7, .9,
		.7, .4, .0,  .7, .5, .0,  .7, .6, .0,  .7, .7, .9,
		.9, .4, .0,  .9, .5, .0,  .9, .6, .0,  .9, .7, .9,
		.2, .4, .0,  .2, .5, .0,  .2, .6, .0,  .2, .7, .9 };

	glTexImage2D(GL_TEXTURE_2D, 0, 3, 4, 4, 0, GL_RGB, GL_FLOAT, texData);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);

	// set up framebuffers
	glGenFramebuffersEXT(1, &framebuffer);
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);

	// conf. textures
	glGenTextures(4, framebufferTexture);

	glBindTexture(GL_TEXTURE_2D, framebufferTexture[0]);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

	glBindTexture(GL_TEXTURE_2D, framebufferTexture[1]);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

	glBindTexture(GL_TEXTURE_2D, framebufferTexture[2]);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

	glBindTexture(GL_TEXTURE_2D, framebufferTexture[3]);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, textureSize[0], textureSize[1], 0, GL_RGBA, GL_FLOAT, 0);

	// attach textures to framebuffer
	glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, framebufferTexture[0], 0);
	glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_2D, framebufferTexture[1], 0);
	glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT2_EXT, GL_TEXTURE_2D, framebufferTexture[2], 0);
	glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT3_EXT, GL_TEXTURE_2D, framebufferTexture[3], 0);

	// conf. renderbuffer
	glGenRenderbuffersEXT(1, &renderbuffer);

	glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, renderbuffer);
	glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT32, textureSize[0], textureSize[1]);
	glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, renderbuffer);

	if(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT)!=GL_FRAMEBUFFER_COMPLETE_EXT)
		return -2;

#ifdef LINUX
	gettimeofday(&start, 0);
#endif

	// done initiating
	glutMainLoop();

	return 0;
}

void setCamera(float angle) {
	// ställ in kameran
	glLoadIdentity();
	gluLookAt(cosf(angle) * 5, 0.0f, sinf(angle) * 5,
			0.0f, 0.0f, 0.0f,
			0.0f, 1.0f, 0.0f);
}

void reshapeWindow(int w, int h) {
	glViewport(0, 0, w, h);

	glMatrixMode(GL_PROJECTION);
		glLoadIdentity();
		gluPerspective(45.0, (double)w/(double)h, .1, 10.0);
	glMatrixMode(GL_MODELVIEW);
}

void redrawRepeatedly() {
	glutPostRedisplay();

	if(numDisplays > 1000) {
#ifdef LINUX
		gettimeofday(&end, 0);

		int time = (end.tv_usec - start.tv_usec) / 1000;
		time += 1000 * (end.tv_sec - start.tv_sec);
		std::cout << (float)numDisplays * 1000.0f / (float)time << std::endl;
#endif

		exit(0);
	}
}

void displayScreen() {
	// increase number of displays
	numDisplays ++;

	static float cameraAngle = 1.0f;
	cameraAngle += .001;
	setCamera(cameraAngle);

	// render to the textures
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);

	// set viewport to texture size
	int viewport[4];
	glGetIntegerv(GL_VIEWPORT, viewport);
	glViewport(0, 0, textureSize[0], textureSize[1]);

	// clear the buffers
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glEnable(GL_DEPTH_TEST);

	// set the four textures as multitargets
	GLenum buffers[4] = { GL_COLOR_ATTACHMENT0_EXT,
		GL_COLOR_ATTACHMENT1_EXT,
		GL_COLOR_ATTACHMENT2_EXT,
		GL_COLOR_ATTACHMENT3_EXT };
	glDrawBuffers(3, buffers);
	
	// set the deferring shader
	glUseProgramObjectARB(deferringShader);

	// *** draw scene (the polygon)
	
	// set texture for the polygon
	glBindTexture(GL_TEXTURE_2D, texture[0]);
	glEnable(GL_TEXTURE_2D);

	// rita ut en polygon
	glBegin(GL_QUADS);
		glNormal3f(-.5f, -.5f, 1.0f);
		glTexCoord2f(0.0f, 0.0f);
		glVertex3f(-1.0f, -1.0f, 0.0f);

		glNormal3f(-.5f, .5f, 1.0f);
		glTexCoord2f(0.0f, 1.0f);
		glVertex3f(-1.0f, 1.0f, 0.0f);

		glNormal3f(.5f, .5f, 1.0f);
		glTexCoord2f(1.0f, 1.0f);
		glVertex3f(1.0f, 1.0f, 0.0f);

		glNormal3f(.5f, -.5f, 1.0f);
		glTexCoord2f(1.0f, 0.0f);
		glVertex3f(1.0f, -1.0f, 0.0f);
	glEnd();

	// tag bort texturen
	glDisable(GL_TEXTURE_2D);

	// *** ok, done with the scene

	// restore viewport
	glViewport(viewport[0], viewport[1], viewport[2], viewport[3]);

	// *** ambient lighting
	// use fixed pipeline
	glUseProgramObjectARB(0);

	// render to screen, back buffer
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
	glDrawBuffer(GL_BACK);

	// tag bort djuptestet
	glDisable(GL_DEPTH_TEST);

	// use the color texture for ambient lighting
	glBindTexture(GL_TEXTURE_2D, framebufferTexture[1]);
	glEnable(GL_TEXTURE_2D);

	// modulate with color
	glColor3f(.3f, .3f, .3f);

	// change to no projection
	glMatrixMode(GL_PROJECTION);
	glPushMatrix();	// P_PUSH
		glLoadIdentity(); // P_EYE
		
		glMatrixMode(GL_MODELVIEW);
		glPushMatrix();	// M_PUSH
			glLoadIdentity(); // M_EYE

			// draw a screen aligned polygon
			glBegin(GL_QUADS);
				glTexCoord2f(0.0f, 0.0f);
				glVertex3f(-1.0f, -1.0f, .0f);

				glTexCoord2f(1.0f, 0.0f);
				glVertex3f(1.0f, -1.0f, .0f);

				glTexCoord2f(1.0f, 1.0f);
				glVertex3f(1.0f, 1.0f, 0.0f);

				glTexCoord2f(0.0f, 1.0f);
				glVertex3f(-1.0f, 1.0f, 0.0f);
			glEnd();
		

			// *** now add lights
			// enable blending
			glEnable(GL_BLEND);

			// use the lightshader
			glUseProgramObjectARB(lightShader);

			// set the G-buffers
			glActiveTexture(GL_TEXTURE0);
			glBindTexture(GL_TEXTURE_2D, framebufferTexture[0]);

			glActiveTexture(GL_TEXTURE1);
			glBindTexture(GL_TEXTURE_2D, framebufferTexture[1]);

			glActiveTexture(GL_TEXTURE2);
			glBindTexture(GL_TEXTURE_2D, framebufferTexture[2]);

			glUniform1i(lightTexLoc[0], 0);
			glUniform1i(lightTexLoc[1], 1);
			glUniform1i(lightTexLoc[2], 2);

			// set light values
			static float lightAngle = 0.0f;
			lightAngle -= .1;

			glUniform3f(lightPosLoc, cosf(lightAngle) * 3.0f, .0f, sinf(lightAngle) * 3.0f);
			glUniform1f(lightSizeLoc, 10.0f);
			glUniform3f(lightDiffuseLoc, .7f, .7f, .7f);
			glUniform1f(lightLinearAttLoc, 1.0f);
			glUniform1f(lightQuadAttLoc, .0f);

			// draw a screen aligned polygon
			glBegin(GL_QUADS);
				glTexCoord2f(0.0f, 0.0f);
				glVertex3f(-1.0f, -1.0f, .0f);

				glTexCoord2f(1.0f, 0.0f);
				glVertex3f(1.0f, -1.0f, .0f);

				glTexCoord2f(1.0f, 1.0f);
				glVertex3f(1.0f, 1.0f, 0.0f);

				glTexCoord2f(0.0f, 1.0f);
				glVertex3f(-1.0f, 1.0f, 0.0f);
			glEnd();

			glActiveTexture(GL_TEXTURE0);

			// disable blending
			glDisable(GL_BLEND);

	// change to use of perspective
			glPopMatrix(); // M_POP

		glDisable(GL_TEXTURE_2D);

		glMatrixMode(GL_PROJECTION);
	glPopMatrix(); // P_POP
	glMatrixMode(GL_MODELVIEW);

	// display the result
	glutSwapBuffers();
}

(The code should be system independent if you remove #define LINUX on line 6, otherwise it uses sys/time.h.)

Just an educated guess: 7200 is a slow card :slight_smile: Did you try other deferred rendering demos (one from nvidia, for instance)? How do they perform?

Don’t know much about deferred shading, but it has an important performance cost.
1024x1024 is quite a lot of bandwidth with so many buffers, maybe you are fillrate limited.

http://www.opengl.org/discussion_boards/ubbthreads.php?ubb=showflat&Number=230331&fpart=1

Thanks for the replies.

I thought that GeForce 7200 was quite ok. But I will try to find demos for deferred shading runnable under Linux. I didn’t found any from Nvidia thou.

Yes, I am afraid it could be fillrate limited as the speed is not improved much by almost empty shaders. Is there any way this could be verified?

I read that interesting thread about improving deferred shading. Unfortunately it’s only about improving quality (at cost of speed) and for now it’s only the speed I’m concerned about.

To check, whether you are fill-rate limited, simply reduce your window-size (resolution). If it becomes faster, that’s your bottleneck.

Jan.

Thank you. Ok, then that probably is the bottleneck (somewhere between 512x512 and 1024x1025 pixels resolution). I guess it is not much to do about that (but get a faster graphicscard which better can handle the bandwidth).