Shadow Volumes FPS cost

I’ve been testing frame rates with my program and I see that using shadow volumes slows down a lot. So here is my code related to shadow volumes calculation (I think there must be a way of improving the speed, maybe reducing the amount of calls to opengl functions).

FPS:

  • Using VBO for objects & inmediate mode for shadow volumes: 120FPS
  • Using VBO for objects & no shadow volumes: 240FPS
  • Using Inmediate mode for objects and shadow volumes: 40FPS
  • Using Inmediate mode for objects & no shadow volumes: 50FPS

As you can see, the shadow volume code slows down a lot.

Here is the code:


void RS_Render_Obj_Shadow(RS_objeto *obj)
	{
	int i;
        glEnable(GL_STENCIL_TEST);
	for(i = 0; i < obj->p_total; i++) {
            glEnable(GL_POLYGON_OFFSET_FILL);
	    glPolygonOffset(0.0f, 100.0f);
            glEnable(GL_CULL_FACE);   
            glCullFace(GL_FRONT);  
            glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
	    glDepthMask(GL_FALSE);  
	    glStencilFunc(GL_ALWAYS, 0x0, 0xff);
	    glStencilOp(GL_KEEP, GL_INCR, GL_KEEP);
	    RS_Render_Shadow_Volume(obj,obj->poligono[i].a,obj->poligono[i].b,obj->poligono[i].c);
            glCullFace(GL_BACK);
	    glStencilFunc(GL_ALWAYS, 0x0, 0xff);
	    glStencilOp(GL_KEEP, GL_DECR, GL_KEEP);
	    RS_Render_Shadow_Volume(obj,obj->poligono[i].a,obj->poligono[i].b,obj->poligono[i].c);
            glDisable(GL_POLYGON_OFFSET_FILL);
	    glDisable(GL_CULL_FACE);
	    glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
	    glDepthMask(GL_TRUE);
	    glStencilFunc(GL_NOTEQUAL, 0x0, 0xff);
            glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE);
            RS_Draw_Shadow();
		}
	glDisable(GL_STENCIL_TEST);
} 

void RS_Render_Shadow_Volume(RS_objeto *obj,int a, int b, int c)
     {
     int count, i;
     float v[3][3];
     float light_pos[3];
     float lenght;
     i=a-1;
     glPushMatrix();
     RS_Move_Obj(ball.pos[0],ball.pos[1],ball.pos[2]);
     RS_Rotate_Obj(&ball);
     RS_Rotatex_Vect(light_pos,luz0.pos,-ball.rot[X]);
     RS_Rotatey_Vect(light_pos,luz0.pos,-ball.rot[Y]);
     RS_Rotatez_Vect(light_pos,luz0.pos,-ball.rot[Z]);
     for(count = 0; count < 3; count++) {
			                v[count][0] = (obj->vertice[i].x  - light_pos[0]);
			                v[count][1] = (obj->vertice[i].y - light_pos[1]);
			                v[count][2] = (obj->vertice[i].z  - light_pos[2]);
			                RS_Normalize(v[count]);
			                v[count][0] *= M_INFINITY;
			                v[count][1] *= M_INFINITY;
			                v[count][2] *= M_INFINITY;
			                v[count][0] += light_pos[0];
			                v[count][1] += light_pos[1];
			                v[count][2] += light_pos[2];
			                if(i==a-1) i=b-1;
			                else if(i==b-1) i=c-1;
		                    }
     /* back cap */
     glBegin(GL_TRIANGLES);
     glVertex3fv(v[2]);
     glVertex3fv(v[1]);
     glVertex3fv(v[0]);
     glEnd();
     /* front cap */
     glBegin(GL_TRIANGLES);
     glVertex3f(obj->vertice[a-1].x,obj->vertice[a-1].y,obj->vertice[a-1].z);
     glVertex3f(obj->vertice[b-1].x,obj->vertice[b-1].y,obj->vertice[b-1].z);
     glVertex3f(obj->vertice[c-1].x,obj->vertice[c-1].y,obj->vertice[c-1].z);
     glEnd();
     // Sides
     glBegin(GL_QUAD_STRIP);
     glVertex3f(obj->vertice[a-1].x,obj->vertice[a-1].y,obj->vertice[a-1].z);
     glVertex3fv(v[0]);
     glVertex3f(obj->vertice[b-1].x,obj->vertice[b-1].y,obj->vertice[b-1].z);
     glVertex3fv(v[1]);
     glVertex3f(obj->vertice[c-1].x,obj->vertice[c-1].y,obj->vertice[c-1].z);
     glVertex3fv(v[2]);
     glVertex3f(obj->vertice[a-1].x,obj->vertice[a-1].y,obj->vertice[a-1].z);
     glVertex3fv(v[0]);
     glEnd();
     i=a-1;
     glPopMatrix();
} 

void RS_Draw_Shadow()
{
	glPushMatrix();
	glLoadIdentity();
	glMatrixMode(GL_PROJECTION);
	glPushMatrix();
	glLoadIdentity();
	glOrtho(0, 1, 1, 0, 0, 1);
	glDisable(GL_DEPTH_TEST);
        glEnable (GL_BLEND); glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 
	glColor4f(0.0f, 0.0f, 0.0f, 0.85f);
	glBegin(GL_QUADS);
		glVertex2i(0, 0);
		glVertex2i(0, 1);
		glVertex2i(1, 1);
		glVertex2i(1, 0);
	glEnd();
        glDisable (GL_BLEND);
	glEnable(GL_DEPTH_TEST);
	glPopMatrix();
	glMatrixMode(GL_MODELVIEW);
	glPopMatrix();
} 

I’d be grateful is someone can help me improving/optimizing this code
Thanks in advance

oh, this is a big issue. i don’t think just a thread could solve it.

so just suggestions :

  1. immediate mode is always a bad choice, so, cache a buffer for the shadow verts and another for indices, generate them when needed, use client array or VBO to render the shadow volume.

calculating and generating the shadow buffers is the main aspect for you to optimize.

  1. there is an extension ‘glStencilOpSeperate’ and ‘glStencilFuncSeperate’. with them you could draw the shadow volume only once( they make front faces and back faces affect the stencil buffer in different way )

  2. set color mask 0 when you fill the depth buffer
    set color mask and depth mask 0 when you fill the stencil buffer

A lot of optimizations is needed:
VBO, geometry shaders, volume caching, clipping, scissoring,
low-poly or hull models, early stencil rejection etc.

Thanks to both. so maybe to start:

  1. using the silhouette edge instead of rendering a shadow volume for each polygon
  2. using VBO for drawing the shadow volume instead of inmediate mode
    Then i will go on with other improvements.

Thanks =)

In general, I’d highly recommend that you determine where you are bottlenecked before you go optimizing. As a first start, shrink your window. Does your frame rate improve? Then probably fill bound. Work your way up the pipe from there.

Not using immediate mode for anything is worth it though – go ahead and do that.

100x100 = 120FPS
640x480 = 120FPS
800x600 = 100FPS
1024x768= 70FPS

Will start by eliminating inmediate mode from my program. Then I’ll check which aspects may be optimized.

Thanks