Geometry shader based CSM

I have implement 3 split cascaded shadow map , using VSM by store linear z and z^2 as moment in GL_RG32F texture.

while the result look good the performance are not.

The frame rate have been reduced from 97 fps (single shadow map) to 50 fps.

Reading the tutorial here.
http://http.developer.nvidia.com/GPUGems3/gpugems3_ch10.html

suggest that in DX10 you can use geometry shader to output different transformed geometry to texture array.

Is this method also possible on OpenGL 3.2/3.3 ?
and if this is possible what kind of reference/spec should i look into.

I have to admit that I never try geometry shader and texture array before (My current implement using one FBO with 3 attached GL_RG32F and a shared depth buffer that have to be clear every time I render a splitted).

Thank in advance.

You can do it with GL-3.2.
Open the official specification and read sections about geometry shader & texture arrays. Post questions here if your encounter difficulties.

I am now succesfully use texture array to store each slice of shadow map (still have’t tried geometry cloning) and able to see some improvement in framerate.

But using texture array introduced some design problem.

I have some ping pong blur shader that expect simple texture2d as an input.

Are there anyway can I bind each face of the array as simple texture2d
so that I can continue using those blur shader or I have to write a specified blur shader that take texture array and slice index ?

The answer is damn simple: glFramebufferTextureLayer

@somboon: you will have to provide the Z coordinate, as a uniform or vtx-attrib.

thank you Ilian Dinev :slight_smile:

I decided to implement a version of post processing shaders that use sampler2DArray with slice index pass as uniform and it still run as fast as the simple sampler2D one.

using texture array instead of individual texture give some performance boot (around 7 fps) and it very easy to support up to 4 shadow map split in same shader.

Now time to get messy with these geometry cloning thing (but my gut feelings tell me that I not gonna get a lot of performance boost because of the fragment limit).

I tried to use layer rendering to create shadow map but it gave me some problem.

The following geometry shader will only render the last layer (layer 3,gl_Layer = 2) correct.


#version 150 core

precision highp int;

layout(triangles) in;
layout(triangle_strip, max_vertices = 9) out;

uniform mat4 cameraMatrix[3];
uniform mat4 modelMatrix;
uniform mat4 projectionMatrix[3];

out vec4 vertexPosition;

void main()
{
	
	for(int layerIndex = 0; layerIndex < 3;layerIndex++)
	{
	
		gl_Layer = layerIndex;
		
		mat4 cameraModelMatrix = cameraMatrix[layerIndex]*modelMatrix;
		
		for(int i = 0; i < gl_in.length();i++)
		{
			gl_Position = projectionMatrix[layerIndex]*cameraModelMatrix*gl_in[i].gl_Position;
			vertexPosition = cameraModelMatrix*gl_in[i].gl_Position;
			EmitVertex();
		}
		EndPrimitive();
			
	}

	
}

if I change “layerIndex < 3” to “layerIndex < 2” only the second layer will be correctly rendered.

any layer written before the last layer get mangled like it had been over written with some garbage.

I also try to unroll the layer loop and using integer to specified layer/uniform matrix index but it didn’t help.

All the uniform matrix are uploaded correctly(since with testing above the last layer will alway correctly render).

The layered FBO are create with the code below with out any error


void FBO::initFBOForVarianceShadowMapLayered(int width,int height,int shadowCount){

	this->finalizeFBO();

	
	this->width = width;
	this->height = height;

	glGenFramebuffers(1, &fboID);
	glBindFramebuffer(GL_FRAMEBUFFER, fboID);

	//create and attach depth array
	glGenTextures(1, &this->depthBufferTexID);

	glBindTexture(GL_TEXTURE_2D_ARRAY, this->depthBufferTexID);
	glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_DEPTH_COMPONENT24, this->width,this->height,shadowCount, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

	glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,depthBufferTexID,0);


	//create texture array of color buffer
	this->colorBufferTexID.resize(1);
	glGenTextures(1, &this->colorBufferTexID[0]);

	glBindTexture(GL_TEXTURE_2D_ARRAY, this->colorBufferTexID[0]);
	glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RG32F, this->width,this->height,shadowCount, 0, GL_RG, GL_FLOAT, NULL);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

	glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,this->colorBufferTexID[0],0);

	GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);

	if(status == GL_FRAMEBUFFER_COMPLETE){
		printf("Create layered frame buffert succssful
");
	}

	glBindFramebuffer(GL_FRAMEBUFFER, 0);

	this->isFBOInitialize = true;

	getchar();
	
}

and have a shader compile with out error or warning

**** UPDATE ****

with out depth texture attach the layer rendering are correct (But with out any depth testing).

So the error might be where I attach depth buffer to FBO or I didn’t clear the depth buffer right.

I just call this code to clear selected draw buffer (buffer 0)


//activate FBO
glBindFramebuffer(GL_FRAMEBUFFER, fboID);
glViewport(0,0,width, height);
//select draw buffer
glDrawBuffer(GL_COLOR_ATTACHMENT0);

glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);

If you need more info just ask me.

Can you repeat the experiment, but unroll the loop of layerIndex by hand? Try getting the layered rendering working by simplifying your code step by step.

Sorry to bring out such an old post.

I decided to try layer rendering again with loop unrolling as suggested by DmitryM.

but the result on my ATI HD4670 are the same. only the last render layer get the correct result. Other layer render before will get over written with garbage.

I also tried the code on my brother NVIDIA GTS250 and he said it give the correct result (but the performance seem to be worst than multi pass method).

this is the unrolled shader (The rest of the setup code are the same)

vertex shader


#version 150 core

in vec3 vertex;

void main()
{	
	//send this to geometry shader	
	gl_Position = vec4(vertex.xyz,1.0);						
} 

geometry shader


#version 150 core

precision highp int;

layout(triangles) in;
layout(triangle_strip, max_vertices = 9) out;

uniform mat4 cameraMatrix[3];
uniform mat4 modelMatrix;
uniform mat4 projectionMatrix[3];

out vec4 vertexPosition;

void main()
{
	
		
	mat4 cameraModelMatrix;
	mat4 tfMatrix;

	//layer0
	gl_Layer = 0;
	cameraModelMatrix = cameraMatrix[0]*modelMatrix;
	tfMatrix = projectionMatrix[0]*cameraModelMatrix;
	for(int i=0 ; i<3 ; i++)
	{
		gl_Position = tfMatrix*gl_in[i].gl_Position;
		vertexPosition = cameraModelMatrix*gl_in[i].gl_Position;
		EmitVertex();
	}
	EndPrimitive();

	//layer1
	gl_Layer = 1;
	cameraModelMatrix = cameraMatrix[1]*modelMatrix;
	tfMatrix = projectionMatrix[1]*cameraModelMatrix;
	for(int i=0 ; i<3 ; i++)
	{
		gl_Position = tfMatrix*gl_in[i].gl_Position;
		vertexPosition = cameraModelMatrix*gl_in[i].gl_Position;
		EmitVertex();
	}
	EndPrimitive();

	//layer2
	gl_Layer = 2;
	cameraModelMatrix = cameraMatrix[2]*modelMatrix;
	tfMatrix = projectionMatrix[2]*cameraModelMatrix;
	for(int i=0 ; i<3 ; i++)
	{
		gl_Position = tfMatrix*gl_in[i].gl_Position;
		vertexPosition = cameraModelMatrix*gl_in[i].gl_Position;
		EmitVertex();
	}
	EndPrimitive();

}

fragment shader


#version 150 core
precision highp float;

in vec4 vertexPosition;
out vec4 outColor;

void main()
{

	
	float depth = length(vertexPosition) ;
	
	 
	float momentsx = depth;  
	float dx = dFdx(depth);  
	float dy = dFdy(depth);  
	float momentsy = depth*depth + 0.25*(dx*dx + dy*dy);  		
	
	outColor = vec4(momentsx,momentsy,0.0,1.0);
		
}

I tried using a geometry shader to render to a layered framebuffer on a ATI HD4350(PC was running x64 Ubuntu) and had a similiar result i.e. Only one layer was rendered correctly, the rest were garbage.

[QUOTE=somboon;1206506]I am now succesfully use texture array to store each slice of shadow map (still have’t tried geometry cloning) and able to see some improvement in framerate.

But using texture array introduced some design problem.

I have some ping pong blur shader that expect simple texture2d as an input.

Are there anyway can I bind each face of the array as simple texture2d
so that I can continue using those blur shader or I have to write a specified blur shader that take texture array and slice index ?[/QUOTE]

@somboon
Hi, I know that this post is a bit old, but I have a similar problem. I’m trying to generate each shadowmap on one layer of TEXTURE_2D_ARRAY, but i have some difficulties ! maybe can you help me about that ? To generate shadowmaps, i proceed like that :

void initShadows(GLAutoDrawable drawable)
{
GL2 gl = (GL2)drawable.getGL();

        shadow_tex = Buffers.newDirectIntBuffer(1);
        gl.glGenTextures(1, shadow_tex);

        this.shadow_fbo = Buffers.newDirectIntBuffer(1);
        gl.glGenFramebuffers(1, shadow_fbo);


        gl.glActiveTexture(GL2.GL_TEXTURE2);

        gl.glGenTextures(1, shadow_tex);
        gl.glBindTexture(GL2.GL_TEXTURE_2D_ARRAY, shadow_tex.get(0));
        gl.glTexParameteri(GL2.GL_TEXTURE_2D_ARRAY, GL2.GL_TEXTURE_MIN_FILTER, GL2.GL_NEAREST);
        gl.glTexParameteri(GL2.GL_TEXTURE_2D_ARRAY, GL2.GL_TEXTURE_MAG_FILTER, GL2.GL_NEAREST);
        gl.glTexParameteri(GL2.GL_TEXTURE_2D_ARRAY, GL2.GL_TEXTURE_WRAP_S, GL2.GL_CLAMP_TO_BORDER );
        gl.glTexParameteri(GL2.GL_TEXTURE_2D_ARRAY, GL2.GL_TEXTURE_WRAP_T, GL2.GL_CLAMP_TO_BORDER );
        gl.glTexParameteri(GL2.GL_TEXTURE_2D_ARRAY, GL2.GL_TEXTURE_COMPARE_MODE, GL2.GL_COMPARE_REF_TO_TEXTURE);
        gl.glTexParameteri(GL2.GL_TEXTURE_2D_ARRAY, GL2.GL_TEXTURE_COMPARE_FUNC, GL2.GL_LESS);

        gl.glTexParameterfv(GL2.GL_TEXTURE_2D_ARRAY, GL2.GL_TEXTURE_BORDER_COLOR, white, 0);

        gl.glTexImage3D(GL2.GL_TEXTURE_2D_ARRAY, 0, GL2.GL_DEPTH_COMPONENT32, (int)shadowTexSizeX, (int)shadowTexSizeY, (int)spotLights.size(), 0, GL2.GL_DEPTH_COMPONENT, GL2.GL_UNSIGNED_BYTE, null);
        gl.glBindTexture(GL2.GL_TEXTURE_2D_ARRAY, 0);
        gl.glBindFramebuffer(GL2.GL_DRAW_FRAMEBUFFER, this.shadow_fbo.get(0));

        gl.glFramebufferTextureLayer(GL2.GL_DRAW_FRAMEBUFFER, GL2.GL_DEPTH_ATTACHMENT, this.shadow_tex.get(0), 0, 0);

    }

and to draw on the light point of view, on each layer of the texture :

gl.glBindFramebuffer(GL2.GL_DRAW_FRAMEBUFFER, shadow_fbo.get(0));
gl.glClear(GL2.GL_DEPTH_BUFFER_BIT);

        gl.glColorMask(false, false, false, false);
        gl.glDisable(GL2.GL_BLEND);
        
        glFw.pushProjectionMatrix();
        glFw.pushViewMatrix();
        
        for(int i=0; i&lt;spotLights.size(); i++)
        {
            gl.glViewport(0, 0, (int)shadowTexSizeX, (int)shadowTexSizeY);
            //last i parameter for the layer of the texture array
            gl.glFramebufferTextureLayer(GL2.GL_DRAW_FRAMEBUFFER, GL2.GL_DEPTH_ATTACHMENT, this.shadow_tex.get(0), 0, i);
           .....
           //set point of view for each light and draw scene

and, my fragment shader :

uniform sampler2DArrayShadow texShadow;
vec4 shadowCoordinateWdivide = shadowCoord / shadowCoord.w;
float shadow = texture(texShadow, vec4(shadowCoordinateWdivide.xyz, i));
… light calculation ponderate by shadow

is it something wrong ? please help me !