PDA

View Full Version : FBO and early Z culling



Olivier B.
05-24-2007, 06:21 AM
Hi,

I've some trouble using the early Z culling in my program. I've tried this test (http://fytos.com/uploads/gpu/zbufferExampleFBO.zip) acoording to this topic (http://www.gpgpu.org/phpBB2/viewtopic.php?p=10608&sid=6bc2976f46f1054aa51b40ce26b88b22) to see if early Z culling run on my Quadro FX 3450/4000 SDI and it works fine.

I've builded this test to try enabling early Z culling in my program

This is render function

static bool dbgInit = false, dbgZPass = false, dbgCompute = true;

/******** Init pass ********/
testFBOTarg->beginDraw();
testShaderInitPass.enableShader();

glEnable(GL_TEXTURE_RECTANGLE_NV);
glBindTexture(GL_TEXTURE_RECTANGLE_NV, testSrcBufferID);

glBegin(GL_QUADS);
glTexCoord2i(gridX, 0);
glVertex3f(0.0f, 0.0f, 0.0f);

glTexCoord2i(gridX, gridZ);
glVertex3f(0.0f, 0.0f, 1.0f);

glTexCoord2i(0, gridZ);
glVertex3f(1.0f, 0.0f, 1.0f);

glTexCoord2i(0, 0);
glVertex3f(1.0f, 0.0f, 0.0f);
glEnd();

glDisable(GL_TEXTURE_RECTANGLE_NV);

testShaderInitPass.disableShader();
testFBOTarg->endDraw();
/***************************/

if(dbgInit)
{
float *tmpD = new float[testFBOTarg->getWidth() * testFBOTarg->getHeight() * 4];
glBindTexture(GL_TEXTURE_RECTANGLE_NV, testFBOTarg->getTexAttach(GL_COLOR_ATTACHMENT0_EXT));
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, GL_FLOAT, tmpD);
imdebug("rgba b=32f w=%d h=%d %p", testFBOTarg->getWidth(), testFBOTarg->getHeight(), tmpD);
dbgInit = false;
delete[] tmpD;
}

/******** Switch FBO ********/
PXVFramebufferObject *tmpFBO = testFBOSrc;
testFBOSrc = testFBOTarg;
testFBOTarg = tmpFBO;
GLuint tmpID = testSrcBufferID;
testSrcBufferID = testTargBufferID;
testTargBufferID = tmpID;
/****************************/

/******** Early Z-Culling init pass ********/
testFBOTarg->beginDraw(false);
testShaderZPass.enableShader();

glEnable(GL_TEXTURE_RECTANGLE_NV);
glBindTexture(GL_TEXTURE_RECTANGLE_NV, testSrcBufferID);

glBegin(GL_QUADS);
glTexCoord2i(gridX, 0);
glVertex3f(0.0f, 0.0f, 0.0f);

glTexCoord2i(gridX, gridZ);
glVertex3f(0.0f, 0.0f, 1.0f);

glTexCoord2i(0, gridZ);
glVertex3f(1.0f, 0.0f, 1.0f);

glTexCoord2i(0, 0);
glVertex3f(1.0f, 0.0f, 0.0f);
glEnd();

glDisable(GL_TEXTURE_RECTANGLE_NV);

testShaderZPass.disableShader();
testFBOTarg->endDraw();
/*******************************************/

if(dbgZPass)
{
testFBOTarg->bindFBO();
imdebugDepthf(0, 0, testFBOTarg->getWidth(), testFBOTarg->getHeight());
dbgZPass = false;
testFBOTarg->unbindFBO();
}

/******** Switch FBO ********/
tmpFBO = testFBOSrc;
testFBOSrc = testFBOTarg;
testFBOTarg = tmpFBO;
tmpID = testSrcBufferID;
testSrcBufferID = testTargBufferID;
testTargBufferID = tmpID;
/****************************/

/******** Compute pass ********/
testFBOTarg->beginDraw(false, false);
testShaderComputePass.enableShader();

glEnable(GL_TEXTURE_RECTANGLE_NV);
glBindTexture(GL_TEXTURE_RECTANGLE_NV, testSrcBufferID);

glBegin(GL_QUADS);
glTexCoord2i(gridX, 0);
glVertex3f(0.0f, 0.0f, 0.0f);

glTexCoord2i(gridX, gridZ);
glVertex3f(0.0f, 0.0f, 1.0f);

glTexCoord2i(0, gridZ);
glVertex3f(1.0f, 0.0f, 1.0f);

glTexCoord2i(0, 0);
glVertex3f(1.0f, 0.0f, 0.0f);
glEnd();

glDisable(GL_TEXTURE_RECTANGLE_NV);

testShaderComputePass.disableShader();
testFBOTarg->endDraw();
/******************************/

if(dbgCompute)
{
float *tmpD = new float[testFBOTarg->getWidth() * testFBOTarg->getHeight() * 4];
glBindTexture(GL_TEXTURE_RECTANGLE_NV, testFBOTarg->getTexAttach(GL_COLOR_ATTACHMENT0_EXT));
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, GL_FLOAT, tmpD);
imdebug("rgba b=32f w=%d h=%d %p", testFBOTarg->getWidth(), testFBOTarg->getHeight(), tmpD);
dbgCompute = false;
delete[] tmpD;
}

/******** Switch FBO ********/
tmpFBO = testFBOSrc;
testFBOSrc = testFBOTarg;
testFBOTarg = tmpFBO;
tmpID = testSrcBufferID;
testSrcBufferID = testTargBufferID;
testTargBufferID = tmpID;
/****************************/Here prototype for beginDraw FBO function

inline void beginDraw(bool clearColor = true, bool clearDepth = true);And differents fragment program used in test

Init pass
uniform samplerRect src;

void main()
{
vec4 outputVal = vec4(0.15, 0.15, 0.75, 1.0);

gl_FragColor = outputVal;
}

Z pass
uniform samplerRect src;

void main()
{
vec4 inputVal = textureRect(src, gl_TexCoord[0].st);
vec4 outputVal = inputVal;

discard;

gl_FragColor = outputVal;
}

Compute pass
uniform samplerRect src;

void main()
{
vec4 inputVal = textureRect(src, gl_TexCoord[0].st);
vec4 outputVal = inputVal;

for(int i = 0; i < 100; i++)
{
outputVal.r = ((int)gl_TexCoord[0].s % 2 == 0) ? 1.0 : 0.0;
}

gl_FragColor = outputVal;
}Init pass stand only because in real program I have an init pass
Un/commented "discard" in Z pass allow to control fragment proceed in compute pass.
Compute pass only make a time consuming operation for test.

I use Fraps to measure performance and I note no difference between test with discard commented an discard uncommented (22fps). If somebody have an idea on my trouble?

Zengar
05-24-2007, 08:46 AM
I don't understand what are you trying to acomplish... Early z-culling is a feature, where a fragment can get discarded before it runs throug the fragment shader. To make use of it you lay out the depth first (render the scene with no color output) and then render the scene the second time. The discard keyword will in many cases show no speed up, as it just prohibits the fragment from passing, the shader still may have to run to the end.

Olivier B.
05-25-2007, 01:00 AM
To make use of it you lay out the depth first (render the scene with no color output)
--> It's my Early Z-Culling init pass


then render the scene the second time --> It's my Compute pass

I use discard in my frag shader only to write the depth buffer or not. Discarded frag in Z pass means that they will be compute in 2nd pass.

ze moo
05-25-2007, 07:20 AM
Originally posted by Olivier B.:
...
I use discard in my frag shader only to write the depth buffer or not. Discarded frag in Z pass means that they will be compute in 2nd pass. you should disable color writes using glColorMask instead of discarding the fragment inside the shader

cheers

Olivier B.
05-25-2007, 07:29 AM
I cannot disable writing color because in the real program I need writing color values in this pass.
And I think writing color don't deactivate early Z-Culling

AlexN
05-25-2007, 07:41 AM
Discard and alpha test will typically not work with early Z-cull - any depth values written while alpha test or a shader with discard is active will not contribute to early Z-cull. That said...I've had some luck in the past with getting early stencil reject to work with alpha test. Also, others have found that early Z-cull is less effective with FBOs or large render targets than with the main backbuffer.

Olivier B.
05-25-2007, 07:58 AM
Ok but I found this example (http://www.gpgpu.org/w/index.php/Code_Examples#Early-z) and it use discard :confused:

Jackis
05-25-2007, 08:29 AM
May be, discard doesn't work for your Quadro, as for GeForceFX (I mean, it may cause Early-Z to break).
Get the working example and make it not working - so you shall get the reasons for Early-Z to die, and it would be great.

Olivier B.
05-25-2007, 09:13 AM
But when I comment the discard (i.e. no fragment proceeded in compute pass), I've no perf gain.

Jackis
05-28-2007, 04:04 AM
Okey, so may be you have ALPHA_TEST enabled, check it to be disabled.

Olivier B.
05-28-2007, 05:01 AM
Thanks for your help but ALPHA_TEST is well disabled.

Jackis
05-28-2007, 08:52 AM
Okey, are you sure you have the same depth renderbuffer for prepare and for render pass? As for your code, you have different FBO's for them, so may be you have different depth attachments, and that's bad.
Are you sure, that it's okey with depth write mask? Your code has no setup function calls, so it is hard to understand, what is executed there or what i not executed.
You say, you have some working example. May be, you do something wrong here, comparing to that example?
Bon chance!

Olivier B.
05-28-2007, 09:58 AM
My two FBOs share the same depth buffer. I've put an occlusion query to see how many fragment are proceed and number is correct. I've compare my example with the working example but the only differences is working example use Cg and GLUT instead of GLSL and SDL for my example. I've tried to init depth buffer without shader and change color/depthMask but nothing work.

Olivier B.
05-29-2007, 02:04 AM
I've writed a short program to show all step of how I test early Z culling.


#include <iostream>

#include <SDL.h>
#include <GL/glew.h>
#include <imdebuggl.h>

using namespace std;

GLuint fbo1; // Framebuffer Object
GLuint colorMap1; // Color Map for Framebuffer Object
GLuint depthMap1; // Depth map for Framebuffer Object
GLuint shader1; // Shader program

static const char *vertProg = {
"void main(void) "
"{ "
" gl_TexCoord[0] = gl_MultiTexCoord0; "
" gl_Position = ftransform(); "
"} "
};

static const char *fragProg = {
"void main(void) "
"{ "
" vec4 outputVal = vec4(0.0, 0.0, 0.0, 1.0); "
" for(int i = 0; i < 100; i++) "
" { "
" outputVal.r = ((int)gl_TexCoord[0].s % 2 == 0) ? 1.0 : 0.0; "
" } "
" gl_FragColor = outputVal; "
"} "
};

void drawScene(int scrWidth, int scrHeight, int fboWidth, int fboHeight)
{
static bool halfRender = true, dbgResult = true;

// Bind FBO
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo1);

glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

glViewport(0, 0, fboWidth, fboHeight);

glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();

glOrtho(-0.5, 0.5, -0.5, 0.5, 0.1, 1024.0);

glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glLoadIdentity();

gluLookAt(0.5, 1.0, 0.5,
0.5, 0.0, 0.5,
0.0, 0.0, 1.0);

glEnable(GL_DEPTH_TEST);

// If Z buffer will be init for early Z culling
if(halfRender)
{
glColorMask(false, false, false, false);
glDepthMask(true);

glBegin(GL_QUADS);
glVertex3f(0.0f, 0.0f, 0.0f);
glVertex3f(0.0f, 0.0f, 0.5f);
glVertex3f(1.0f, 0.0f, 0.5f);
glVertex3f(1.0f, 0.0f, 0.0f);
glEnd();
}

glColorMask(true, true, true, true);
glDepthMask(false);

// Bind Shader
glUseProgramObjectARB(shader1);

glBegin(GL_QUADS);
glTexCoord2i(fboWidth, 0);
glVertex3f(0.0f, 0.0f, 0.0f);

glTexCoord2i(fboWidth, fboHeight);
glVertex3f(0.0f, 0.0f, 1.0f);

glTexCoord2i(0, fboHeight);
glVertex3f(1.0f, 0.0f, 1.0f);

glTexCoord2i(0, 0);
glVertex3f(1.0f, 0.0f, 0.0f);
glEnd();

// Unbind Shader
glUseProgramObjectARB(0);

glMatrixMode(GL_PROJECTION);
glPopMatrix();

glMatrixMode(GL_MODELVIEW);
glPopMatrix();

// Unbind FBO
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);

if(dbgResult)
{
float *tmpD = new float[fboWidth * fboHeight * 4];
glBindTexture(GL_TEXTURE_RECTANGLE_NV, colorMap1);
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, GL_FLOAT, tmpD);
imdebug("rgba b=32f w=%d h=%d %p", fboWidth, fboHeight, tmpD);
dbgResult = false;
delete[] tmpD;
}

SDL_GL_SwapBuffers();
}

void genFramebuffer(int width, int height)
{
// Init Color Map
glGenTextures(1, &amp;colorMap1);
glBindTexture(GL_TEXTURE_RECTANGLE_NV, colorMap1);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, GL_FLOAT_RGBA32_NV, width, height, 0, GL_RGB, GL_FLOAT, NULL);

// Init Depth Map
glGenTextures(1, &amp;depthMap1);
glBindTexture(GL_TEXTURE_RECTANGLE_NV, depthMap1);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, GL_DEPTH_COMPONENT24, width, height, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL);

// Init FBO
glGenFramebuffersEXT(1, &amp;fbo1);

// Textures Attachement
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo1);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_NV, colorMap1, 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_NV, depthMap1, 0);

if(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT)
{
cout << "FBO is not COMPLETE" << endl;
}

glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
}

void genShader()
{
GLint compilResult;

// Get a shader program object id
shader1 = glCreateProgramObjectARB();

// Get handle to vertex shader
GLhandleARB vertHandle = glCreateShaderObjectARB(GL_VERTEX_SHADER);

// Sends vertex shader source to OpenGL
glShaderSource(vertHandle, 1, &amp;vertProg, 0);

// Vertex shader compilation
glCompileShader(vertHandle);

// Link vertex shader program with shader program object
glAttachObjectARB(shader1, vertHandle);

// Get vertex shader compilation log in case of error
glGetObjectParameterivARB(vertHandle, GL_OBJECT_COMPILE_STATUS_ARB, &amp;compilResult);
if(compilResult == GL_FALSE)
{
char temp[4096];
glGetInfoLogARB(vertHandle, 4096, NULL, temp);
cout << "Vertex shader : " << temp << endl;
}

// Delete vertex shader handle
glDeleteObjectARB(vertHandle);

// Get handle to frag shader
GLhandleARB fragHandle = glCreateShaderObjectARB(GL_FRAGMENT_SHADER);

// Sends frag shader source to OpenGL
glShaderSource(fragHandle, 1, &amp;fragProg, 0);

// Vertex frag compilation
glCompileShader(fragHandle);

// Link frag shader program with shader program object
glAttachObjectARB(shader1, fragHandle);

// Get frag shader compilation log in case of error
glGetObjectParameterivARB(fragHandle, GL_OBJECT_COMPILE_STATUS_ARB, &amp;compilResult);
if (compilResult == GL_FALSE)
{
char temp[4096];
glGetInfoLogARB(fragHandle, 4096, NULL, temp);
cout << "Frag shader : " << temp << endl;
}

// Delete frag shader handle
glDeleteObjectARB(fragHandle);

// Link shader programs
glLinkProgram(shader1);
}

void delFramebuffer()
{
glDeleteTextures(1, &amp;colorMap1);
glDeleteTextures(1, &amp;depthMap1);
glDeleteFramebuffersEXT(1, &amp;fbo1);
}

void delShader()
{
}

int main(int argc, char* argv[])
{
bool finish = false; // Indicate program end
int scrWidth = 512; // Screen width resolution
int scrHeight = 512; // Screen height resolution
int fboWidth = 512; // Framebuffer width resolution
int fboHeight = 512; // Framebuffer height resolution
int scrBpp = 32; // Screen bits per pixel
SDL_Surface *screen; // Video buffer to display

// SDL video init
if(SDL_Init(SDL_INIT_VIDEO) < 0)
{
cout << "Unable to init SDL : " << SDL_GetError() << endl;
return 1;
}

// Program exit function
atexit(SDL_Quit);

SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 5);
SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 5);
SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 5);
SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 16);
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);

// Video mode init
screen = SDL_SetVideoMode(scrWidth, scrHeight, scrBpp, SDL_OPENGL/*|SDL_FULLSCREEN*/);

// Video mode error
if(screen == NULL)
{
cout << "Unable to set " << scrWidth << "x" << scrHeight << "x" << scrBpp
<< " video mode : " << SDL_GetError() << endl;
return 1;
}

GLenum err = glewInit();

if(err != GLEW_OK)
{
cout << "Unable to start glew : " << glewGetErrorString(err) << endl;
return 1;
}

genFramebuffer(fboWidth, fboHeight);
genShader();

// Program loop
while(!finish)
{
SDL_Event event;

while(SDL_PollEvent(&amp;event))
{
if(event.type == SDL_QUIT)
finish = true;

if(event.type == SDL_KEYDOWN)
{
if(event.key.keysym.sym == SDLK_ESCAPE)
finish = true;
}
}

drawScene(scrWidth, scrHeight, fboWidth, fboHeight);
}

delShader();
delFramebuffer();

return 0;
}In this program early Z don't work too.

Ysaneya
05-29-2007, 03:00 AM
I was under the impression that early Z didn't work with FBOs ?!

Y.

Olivier B.
05-29-2007, 03:08 AM
According to this test (http://fytos.com/uploads/gpu/zbufferExampleFBO.zip) Early Z work with FBO.
I'm looking for include same shader but in Cg in my test. I think the problem come from GLSL.

Jackis
05-29-2007, 03:11 AM
Hmmm, rather strange. By the way, you have very simple situation to see gains from early-Z.

I've mentioned, you create your simple back-buffer with 16 bits color and depth, and a framebuffer with 32 bits color and 24 bit depth.
Some time ago (about a year) it was quite hard to get FBO 24 bits depth when you have 16 bits screen framebuffer depth. I hope, now it's no problem.

Try this demo and see, whether your card can have benefits from Z-Cull: http://humus.ca/index.php?page=3D&ID=3