Hello,

A while back I posted about some deferred shading performance problems, and how using stencil light volumes actually slowed it down instead of speeding it up.
This has probably been thought up before, but I thought I might share it here anyways in case it hasn't.
I managed to batch light stencil tests into groups of 8 (8 stencil bits) by using glStencilMask to act as an OR operation to write which lights are affecting which pixels when performing a depth test on the front faces of the light volumes.
In a second pass for the back faces of the light volumes, I then switch depth testing to GL_GREATER, and set the stencil func to render the light only if its bit was set earlier, using the and'ed mask parameter.

With this system, there is no overdraw, and the stencil test is fast. Overall, the system is faster than without the stenciling.

This is how the light rendering looks like:

Code :
// ---------------------------- Render Lights ----------------------------
 
 
    // Query visible lights
    std::vector<OctreeOccupant*> result;
 
 
    m_lightSPT.Query_Frustum(result, pScene->GetFrustum());
 
 
    glEnable(GL_VERTEX_ARRAY);
 
 
    glEnable(GL_STENCIL_TEST);
 
 
    glClearStencil(0);
 
 
    for(unsigned int i = 0, size = result.size(); i < size;)
    {
        glClear(GL_STENCIL_BUFFER_BIT);
 
 
        glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
 
 
        glColorMask(false, false, false, false);
 
 
        // Batch 8 lights together
        unsigned int firstLightIndex = i;
 
 
        for(unsigned int j = 0; j < 8 && i < size; j++, i++)
        {
            glStencilFunc(GL_ALWAYS, 0xff, 0xff);
            glStencilMask(m_lightIndices[j]);
 
 
            Light* pLight = static_cast<Light*>(result[i]);
 
 
            if(!pLight->m_enabled)
                continue;
 
 
            pLight->SetTransform(pScene);
            pLight->RenderBoundingGeom();
        }
 
 
        i = firstLightIndex;
 
 
        glColorMask(true, true, true, true);
 
 
        // Now render with reversed depth testing and only to stenciled regions
        glCullFace(GL_FRONT);
 
 
        glDepthFunc(GL_GREATER);
 
 
        glEnable(GL_BLEND);
 
 
        glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
 
 
        for(unsigned int j = 0; j < 8 && i < size; j++, i++)
        {
            glStencilFunc(GL_EQUAL, 0xff, m_lightIndices[j]);
 
 
            Light* pLight = static_cast<Light*>(result[i]);
 
 
            if(!pLight->m_enabled)
                continue;
 
 
            // If camera is inside light, do not perform depth test (would cull it away improperly)
            if(pLight->Intersects(pScene->m_camera.m_position))
            {
                glDisable(GL_STENCIL_TEST);
 
 
                pLight->SetTransform(pScene);
 
 
                pLight->SetShader(pScene);
 
 
                pLight->RenderBoundingGeom();
 
 
                glEnable(GL_STENCIL_TEST);
            }
            else
            {
                pLight->SetTransform(pScene);
 
 
                pLight->SetShader(pScene);
 
 
                pLight->RenderBoundingGeom();
            }
        }
 
 
        glCullFace(GL_BACK);
 
 
        glDepthFunc(GL_LESS);
 
 
        glDisable(GL_BLEND);
 
 
        Shader::Unbind();
    }
 
 
    // Re-enable stencil writes to all bits
    glStencilMask(0xff);
 
 
    glDisable(GL_VERTEX_ARRAY);
 
 
    glDisable(GL_STENCIL_TEST);
 
 
    GL_ERROR_CHECK();