Simple_soft_shadows

There is a nVidia demo called simple_soft_shadows in there SDK9 and you can download the individual demo if you want.
There is this fs for NV40
Anyone ever converted this to GLSL?


!!ARBfp1.0
#
# NV4X shader 
# 8 estimation samples
# 64 total samples
#

OPTION NV_fragment_program2;


PARAM	filtersize = program.local[0];
PARAM	jxyscale = program.local[1];
PARAM	oneover8 = 0.125;
PARAM	oneover64 = 0.015625;

TEMP	smcoord;

SHORT TEMP	fsize, jcoord, shadow, jitter, c;
SHORT TEMP	normal, lightv, view, half, diffuse, refl, spec;
SHORT TEMP	spot, decal;

# some setup
MUL		fsize.w, fragment.texcoord[1].w, filtersize.w;
MOV		smcoord.zw, fragment.texcoord[1];
MUL		jcoord.xyz, fragment.position, jxyscale;	# assumes that z is set to 0, too
MOV		shadow.w, 0;

# perform 8 'test' samples

# samples 1/2

TEX		jitter, jcoord, texture[2], 3D;	# lookup jitter map
ADDH	jcoord.z, jcoord.z, 0.03125;	# increment lookup coord

MAD		smcoord.xy, jitter.xyxx, fsize.w, fragment.texcoord[1];
TXP		shadow.x, smcoord, texture[1], 2D;	# sample shadowmap

MAD		smcoord.xy, jitter.zwxx, fsize.w, fragment.texcoord[1];
TXP		shadow.y, smcoord, texture[1], 2D;	# sample shadowmap

DP2AX	shadow.w, shadow, oneover8, shadow.w;

# samples 3/4

TEX		jitter, jcoord, texture[2], 3D;	# lookup jitter map
ADDH	jcoord.z, jcoord.z, 0.03125;	# increment lookup coord

MAD		smcoord.xy, jitter.xyxx, fsize.w, fragment.texcoord[1];
TXP		shadow.x, smcoord, texture[1], 2D;	# sample shadowmap

MAD		smcoord.xy, jitter.zwxx, fsize.w, fragment.texcoord[1];
TXP		shadow.y, smcoord, texture[1], 2D;	# sample shadowmap

DP2AX	shadow.w, shadow, oneover8, shadow.w;

# samples 5/6

TEX		jitter, jcoord, texture[2], 3D;	# lookup jitter map
ADDH	jcoord.z, jcoord.z, 0.03125;	# increment lookup coord

MAD		smcoord.xy, jitter.xyxx, fsize.w, fragment.texcoord[1];
TXP		shadow.x, smcoord, texture[1], 2D;	# sample shadowmap

MAD		smcoord.xy, jitter.zwxx, fsize.w, fragment.texcoord[1];
TXP		shadow.y, smcoord, texture[1], 2D;	# sample shadowmap

DP2AX	shadow.w, shadow, oneover8, shadow.w;

# samples 7/8

TEX		jitter, jcoord, texture[2], 3D;	# lookup jitter map
ADDH	jcoord.z, jcoord.z, 0.03125;	# increment lookup coord

MAD		smcoord.xy, jitter.xyxx, fsize.w, fragment.texcoord[1];
TXP		shadow.x, smcoord, texture[1], 2D;	# sample shadowmap

MAD		smcoord.xy, jitter.zwxx, fsize.w, fragment.texcoord[1];
TXP		shadow.y, smcoord, texture[1], 2D;	# sample shadowmap

DP2AX	shadow.w, shadow, oneover8, shadow.w;

# normalize vectors
NRMH	normal, fragment.texcoord[2];
NRMH	lightv, fragment.texcoord[3];
NRMH	view, fragment.texcoord[4];

# diffuse dot product
DP3XC_SAT	diffuse.w, lightv, normal;

SUBX		c.w, shadow.w, 1;
MULX		c.w, c.w, shadow.w;
MULXC		c.w, c.w, diffuse.w;

IF NE.w;	# oversample only across shadow edge

	MUL	shadow.w, shadow, oneover8;
	
	REP	28;		# the rest 56 samples ...

		TEX		jitter, jcoord, texture[2], 3D;	# lookup jitter map
		ADDH	jcoord.z, jcoord.z, 0.03125;#0.0078125;	# increment lookup coord

		MAD		smcoord.xy, jitter.xyxx, fsize.w, fragment.texcoord[1];
		TXP		shadow.x, smcoord, texture[1], 2D;	# sample shadowmap

		MAD		smcoord.xy, jitter.zwxx, fsize.w, fragment.texcoord[1];
		TXP		shadow.y, smcoord, texture[1], 2D;	# sample shadowmap

		DP2AX	shadow.w, shadow, oneover64, shadow.w;

	ENDREP;

ENDIF;

RFLH	refl.xyz, normal, view;

# calculate specular term
DP3X_SAT	spec.w, refl, lightv;
POW		spec, spec.w, 64.0;

TXP		spot, fragment.texcoord[1], texture[3], 2D;
TEX		decal, fragment.texcoord[0], texture[0], 2D;
MULX	shadow, shadow.w, spot;
MULX	diffuse, diffuse.w, decal;
MULX	diffuse, diffuse, shadow;
MADX	diffuse, spec, shadow, diffuse;

MADX	result.color, decal, 0.1, diffuse;

END

OK, so no one has done it, but does anyone know how to convert it to GLSL and hopefully, the NV compiler will be able to generate the same low level instruction?

Example: DP2AX shadow.w, shadow, oneover8, shadow.w;

I’m going to test it soon but if anyone has an example of GLSL code using these NV extensions.

All the lowel level code is explained in the extensions.

e.g. GL_NV_fragment_program2

Section 3.11.5.50, DP2A: 2-Component Dot Product w/Scalar Add

The DP2 instruction computes a two-component dot product of the two
operands (using the first two components), adds the x component of the
third operand, and replicates the result to all four components of the
result vector.
  tmp0 = VectorLoad(op0);
  tmp1 = VectorLoad(op1);
  tmp2 = VectorLoad(op2);
  dot = (tmp0.x * tmp1.x) + (tmp0.y * tmp1.y) + tmp2.x;
  result.x = dot;
  result.y = dot;
  result.z = dot;
  result.w = dot;

The additional X just means that it’s using a 12-bit fixed-point representation.

R = 32bit float
H = 16bit half
X = 12bit fixed
C = Clamp

N.

That’s not the problem. BTW, C isn’t for clamping.

I want to write the high level equivalent but don’t know how.

Maybe this helps. I converted a small part to GLSL for you:


uniform vec4 filtersize;
uniform vec4 jxyscale;

const float oneover8 = 0.125;
const float oneover64 = 0.015625;

sampler2D tex1;
sampler3D tex2;

void main()
{
	vec4 smcoord;
	vec4 fsize, jcoord, shadow, jitter, c;
	vec4 normal, lightv, view, half, diffuse, refl, spec;
	vec4 spot, decal;

	fsize.w		= gl_TexCoord[1].w*filtersize.w;
	smcoord.zw	= gl_TexCoord[1].zw;

	jcoord.xyz	= gl_FragCoord.xyz*jxyscale.xyz;
	shadow.w	= 0;

	jitter 		= texture3D(tex2,jcoord);
	jcoord.z       += 0.03125;

	smcoord.xy	= jitter.xy*fsize.w+gl_TexCoord[1].xy;
	shadow.x	= texture2D(tex1,smcoord).x;

	smcoord.xy	= jitter.zw*fsize.w+gl_TexCoord[1].xy;
	shadow.y	= texture2D(tex1,smcoord).y;

	shadow.w	= shadow.x*oneover8+shadow.y*oneover8+shadow.w;

	Etc.
}


N.

Yes, but that is standard GLSL.

If I use half, half2, half3, half4, cgc offline compiler is able to compile and generate the proper fp40 instructions, but if I run my program (which uses nvidia’s driver), it says

error C7506: OpenGL does not define the global type half2

Ah, I should just add support for NV_fragment_program2 in the engine and forget about this crap.

‘C’ is not for clamping - ‘C’ is conditional register masking, for writing only to those components, which are not zero.

About sources - -NiCo- is right, it disassembles very simple, especially with remarks and comments. I have them on HLSL.

I see, but halfs are not supported in GLSL, the keyword half is only reserved for future use. I don’t know why the offline compiler does it differently though…

Full half float support along with the pack/unpack instructions are the main reasons I switched to Cg.

I wonder if it would work if you write it in Cg and pass it on to GLSL through GL_EXT_Cg_shader.

N.

Nvidia drivers support the half data types with warning instead of error under some situations. That support is disabled by NVemulate if “Generate shader portability errors” is checked and also at any time when the shader contains the #version directive. If half data types are available, preprocessor define __GLSL_CG_DATA_TYPES is defined.

I removed the #version directive and now it compiles and the errors are now warnings. But how did you know about that trick?

It was written somewhere… Maybe even in the GLSL 1.20 spec, don’t know anymore and I am too lazy to search for it.

In Nvidia GLSL release notes

Good news. I got the GLSL version working.
Also, with the original low level shaders that Yury Uralsky has written, it gets 120FPS. With my GLSL code, it gets 126FPS.
I guess the GLSL compiler of NV is very advanced.

vertex shader


//VERTEX SHADER
//This is a GLSL version of soft_shadow.vp



uniform mat4 TextureMatrix1;
uniform vec4 LightPosition0;
uniform mat4 ModelviewMatrix;



void main()
{
	vec4 eyeVertex;
	vec3 lightVector, eyeVector;
	gl_Position = ftransform();

	gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;

	eyeVertex = gModelviewMatrix * gl_Vertex;
	gl_TexCoord[4].xyz = eyeVertex.xyz;
	lightVector = normalize(LightPosition0.xyz - eyeVertex.xyz);
	gl_TexCoord[3].xyz = lightVector;

	gl_TexCoord[2].xyz = vec3(ModelviewMatrix * vec4(gl_Normal, 0.0));


	gl_TexCoord[1] = TextureMatrix1 * eyeVertex;
}


//FRAGMENT SHADER
//Same as soft_shadow64_nv4x.fp



uniform sampler2D Texture0;			//Color texture
uniform sampler2DShadow Texture1;	//Shadow map
uniform sampler3D Texture2;			//3D noise texture
uniform sampler2D Texture3;			//Projected texture

uniform float filtersize;
uniform vec4 jxyscale;

const half2 oneover8 = half2(0.125, 0.125);
const half2 oneover64 = half2(0.015625, 0.015625);

void main()
{
	vec4 smcoord;

	half4 fsize, jcoord, jitter;
	half4 diffuse;
	half4 spot, decal;
	half4 shadow;
	half3 normal, lightv, view, refl;
	half temp, c, spec;

	//Some setup
	fsize.w = gl_TexCoord[1].w * filtersize;
	smcoord = gl_TexCoord[1];
	//Assumes that z is set to 0, too
	jcoord.xyz = gl_FragCoord.xyz * jxyscale.xyz;


	//Perform 8 'test' samples

	//Samples 1/2
	//Lookup jitter map
	jitter = texture3D(Texture2, jcoord);
	//Increment lookup coord
	jcoord.z = jcoord.z + 0.03125;

	smcoord.xy = jitter.xy * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.x = shadow2DProj(Texture1, smcoord).x;

	smcoord.xy = jitter.zw * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.y = shadow2DProj(Texture1, smcoord).x;

	//DP2AX	shadow.w, shadow, oneover8, shadow.w;
	shadow.w = dot(shadow.xy, oneover8);

	//Samples 3/4
	//Lookup jitter map
	jitter = texture3D(Texture2, jcoord);
	//Increment lookup coord
	jcoord.z = jcoord.z + 0.03125;

	smcoord.xy = jitter.xy * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.x = shadow2DProj(Texture1, smcoord).x;

	smcoord.xy = jitter.zw * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.y = shadow2DProj(Texture1, smcoord).x;

	//DP2AX	shadow.w, shadow, oneover8, shadow.w;
	shadow.w = dot(shadow.xy, oneover8) + shadow.w;

	//Samples 5/6
	//Lookup jitter map
	jitter = texture3D(Texture2, jcoord);
	//Increment lookup coord
	jcoord.z = jcoord.z + 0.03125;

	smcoord.xy = jitter.xy * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.x = shadow2DProj(Texture1, smcoord).x;

	smcoord.xy = jitter.zw * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.y = shadow2DProj(Texture1, smcoord).x;

	//DP2AX	shadow.w, shadow, oneover8, shadow.w;
	shadow.w = dot(shadow.xy, oneover8) + shadow.w;

	//Samples 7/8
	//Lookup jitter map
	jitter = texture3D(Texture2, jcoord);
	jcoord.z = jcoord.z + 0.03125;

	smcoord.xy = jitter.xy * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.x = shadow2DProj(Texture1, smcoord).x;

	smcoord.xy = jitter.zw * fsize.w + gl_TexCoord[1].xy;
	//Sample shadowmap
	shadow.y = shadow2DProj(Texture1, smcoord).x;

	//DP2AX	shadow.w, shadow, oneover8, shadow.w;
	shadow.w = dot(shadow.xy, oneover8) + shadow.w;

	//Normalize vectors
	normal = normalize(gl_TexCoord[2].xyz);
	lightv = normalize(gl_TexCoord[3].xyz);
	view = normalize(gl_TexCoord[4].xyz);

	//Diffuse dot product
	//DP3XC_SAT	diffuse.w, lightv, normal;
	diffuse.w = saturate(dot(lightv, normal));

	c = ((shadow.w - 1.0) * shadow.w) * diffuse.w;

	//Conditional code
	//If c is non zero, do the "if" portion
	if(c != 0.0)	//Oversample only across shadow edge
	{
		shadow.w = shadow.w * oneover8.x;

		for(int i=0; i<28; i++)
		{
			//Lookup jitter map
			jitter = texture3D(Texture2, jcoord);
			jcoord.z = jcoord.z + 0.03125;

			smcoord.xy = jitter.xy * fsize.w + gl_TexCoord[1].xy;
			//Sample shadowmap
			shadow.x = shadow2DProj(Texture1, smcoord).x;

			smcoord.xy = jitter.zw * fsize.w + gl_TexCoord[1].xy;
			//Sample shadowmap
			shadow.y = shadow2DProj(Texture1, smcoord).x;

			//DP2AX	shadow.w, shadow, oneover64, shadow.w;
			shadow.w = dot(shadow.xy, oneover64) + shadow.w;
		}
	}

	refl = reflect(view, normal);

	//Calculate specular term
	spec = pow(saturate(dot(refl, lightv)), 64.0);

	spot = texture2DProj(Texture3, gl_TexCoord[1]);
	decal = texture2D(Texture0, gl_TexCoord[0].xy);
	shadow = shadow.wwww * spot;
	diffuse = diffuse.w * decal * shadow;
	diffuse = spec * shadow + diffuse;

	gl_FragColor = saturate(decal * 0.1 + diffuse);// * 0.01 + shadow;
}


The #version trick was mentioned in an nVidia paper, or maybe in the cgc.exe itself. GLSL running on nVidia drivers can use half floats and the pack/unpack instructions.

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.