pocketmoon

05-30-2002, 12:42 AM

Hi oglers

Slighty OT ? I'm trying to get a couple of octaves of perlin-esque noise out of a vertex shader with room to spare for some other calcs.

The smallest 3D noise shader I've seen is the vnoise effect from the Nvidia SDK/Effects browser, which is 127 instructions for 1 octave of 3d noise. This saves instructions by doing a 32 element array lookup and doesnt use an 'easy' function to interpolate between lattice values.

The one I've come up with is 66 instruction and uses a hashing function to determine to lattice noise values (rather than a lookup) and does a nice easy between them for smooth noise.

the basic hash function is:

n = x+y*57.0+z*17.0;

n = (n*(n*n*15731.0+789221.0)+1376312589.0);

n = n / 65535.0;

n = n - floor(n);

n = n * 2.0 - 1.0;

The vertex shader follows so any comments/ideas on making it smaller please! It's a new version that I haven't tested yet so there may be some typos'

!!VP1.0

# Vertex program procedural noise

# Faked Perlin-style 3D Noise

# repeatable random numbers for given vertex position.

# Interpolates *bilinearly* between 4 neighbours to give a smooth result

#

# v1.1

# 66 instructions but I'm sure I can save a few more http://www.opengl.org/discussion_boards/ubb/wink.gif

# Rob James 2002

# pocketmoon@ntlworld.com

#c16 1 2 3 4

#c13 freq amp 3.0 1.0

#c18 1 57 17 0

#c19 15731.0 789221.0 1376312589.0

#scale up the vector

MAD R0, v[OPOS], c[13].x, c[13].w;

#calc the fractional parts and store in R10

EXP R3.y, R0.x;

MOV R10.x, R3.y;

EXP R10 .y, R0.y;

EXP R3.y, R0.z;

MOV R10.z, R3.y;

#floor (R0)

ADD R0 , R0, -R10;

#calc the slerp parts sx sy sz and stick in R11

MAD R3, R10, -c[16].yyyy, c[16].zzzz;

MUL R4, R3, R10;

MUL R11, R4, R10;

#now look up eight noise values in R12 and R13

#hash into R3 and R4 initially using

# n = x + 57 * y + 17 * z

# MOV R1, R0;

#MUL R2, R0 , c[18];

#DP3 R3.x, R2, c[18].xxxx;

DP3 R3.x, R0, c[18];

ADD R0.y, R0.y, c[18].x;

DP3 R3.y, R0, c[18];

ADD R0 .z, R0.z, c[18].x;

DP3 R3.w, R0, c[18];

ADD R0.y, R0.y, -c[18].x;

DP3 R3.z, R0, c[18];

#hmm

ADD R0.z , R0.z, c[18].x;

DP3 R4.x, R0, c[18];

ADD R0.y, R0.y, c[18].x;

DP3 R4.w, R0, c[18];

ADD R0.x, R0.x, -c[18].x;

DP3 R4.y, R0, c[18];

ADD R0.y, R0.y, -c[18].x;

DP3 R4.x, R0, c[18];

#now do the mashing!

# n = (n*(n*n*15731)+789221)+1376312589

# n = n /65535

# dont ask how I came by those numbers!

MUL R1, R3, R3;

MAD R2, R1, c[19].xxxx, c[19].yyyy;

MAD R1, R3, R2, c[19].zzzz;

MUL R3, R1, c[20].xxxx;

MUL R1, R4, R4;

MAD R2, R1, c[19].xxxx, c[19].yyyy;

MAD R1, R4, R2, c[19].zzzz;

MUL R4, R1, c[20].xxxx;

#keep fractional part

EXP R1.y, R3.x;

MOV R3.x, R1.y;

EXP R1.y, R3.y;

MOV R3.y, R1.y;

EXP R1.y, R3.z;

MOV R3.z, R1.y;

EXP R1.y, R3.w;

MOV R3.w, R1.y;

EXP R1.y, R4.x;

MOV R4.x, R1.y;

EXP R1.y, R4.y;

MOV R4.y, R1.y;

EXP R1.y, R4.z;

MOV R4.z, R1.y;

EXP R1.y, R4.w;

MOV R4.w, R1.y;

#multiply by 2 and subtact 1

#this gives a range -1 to 1

MAD R1, R3, c[16].yyyy, -c[16].xxxx;

MAD R2, R4, c[16].yyyy, -c[16].xxxx;

#a = u + t * (v - u)

#NB Proper slerps http://www.opengl.org/discussion_boards/ubb/wink.gif

#This does 4 at once

ADD R4, R2, -R1;

MAD R3, R4, R11.xxxx, R1;

#two slerps

MOV R4, R3.ywww;

ADD R1, R4, -R3;

MAD R2, R1, R11.yyyy, R3;

#one final slerp

MOV R3.x, R2.y;

ADD R1, R3, -R2;

MAD R3, R1, R11.zzzz, R2;

MOV R8.w, c[12].w;

MUL R8.xyz, v[NRML], R3.x;

MAD R8.xyz, R8, c[13].y, v[OPOS];

MAX o[COL0], R3, -R3;

DP4 o[HPOS].x, c[0], R8;

DP4 o[HPOS].y, c[1], R8;

DP4 o[HPOS].z, c[2], R8;

DP4 o[HPOS].w, c[3], R8;

END

Slighty OT ? I'm trying to get a couple of octaves of perlin-esque noise out of a vertex shader with room to spare for some other calcs.

The smallest 3D noise shader I've seen is the vnoise effect from the Nvidia SDK/Effects browser, which is 127 instructions for 1 octave of 3d noise. This saves instructions by doing a 32 element array lookup and doesnt use an 'easy' function to interpolate between lattice values.

The one I've come up with is 66 instruction and uses a hashing function to determine to lattice noise values (rather than a lookup) and does a nice easy between them for smooth noise.

the basic hash function is:

n = x+y*57.0+z*17.0;

n = (n*(n*n*15731.0+789221.0)+1376312589.0);

n = n / 65535.0;

n = n - floor(n);

n = n * 2.0 - 1.0;

The vertex shader follows so any comments/ideas on making it smaller please! It's a new version that I haven't tested yet so there may be some typos'

!!VP1.0

# Vertex program procedural noise

# Faked Perlin-style 3D Noise

# repeatable random numbers for given vertex position.

# Interpolates *bilinearly* between 4 neighbours to give a smooth result

#

# v1.1

# 66 instructions but I'm sure I can save a few more http://www.opengl.org/discussion_boards/ubb/wink.gif

# Rob James 2002

# pocketmoon@ntlworld.com

#c16 1 2 3 4

#c13 freq amp 3.0 1.0

#c18 1 57 17 0

#c19 15731.0 789221.0 1376312589.0

#scale up the vector

MAD R0, v[OPOS], c[13].x, c[13].w;

#calc the fractional parts and store in R10

EXP R3.y, R0.x;

MOV R10.x, R3.y;

EXP R10 .y, R0.y;

EXP R3.y, R0.z;

MOV R10.z, R3.y;

#floor (R0)

ADD R0 , R0, -R10;

#calc the slerp parts sx sy sz and stick in R11

MAD R3, R10, -c[16].yyyy, c[16].zzzz;

MUL R4, R3, R10;

MUL R11, R4, R10;

#now look up eight noise values in R12 and R13

#hash into R3 and R4 initially using

# n = x + 57 * y + 17 * z

# MOV R1, R0;

#MUL R2, R0 , c[18];

#DP3 R3.x, R2, c[18].xxxx;

DP3 R3.x, R0, c[18];

ADD R0.y, R0.y, c[18].x;

DP3 R3.y, R0, c[18];

ADD R0 .z, R0.z, c[18].x;

DP3 R3.w, R0, c[18];

ADD R0.y, R0.y, -c[18].x;

DP3 R3.z, R0, c[18];

#hmm

ADD R0.z , R0.z, c[18].x;

DP3 R4.x, R0, c[18];

ADD R0.y, R0.y, c[18].x;

DP3 R4.w, R0, c[18];

ADD R0.x, R0.x, -c[18].x;

DP3 R4.y, R0, c[18];

ADD R0.y, R0.y, -c[18].x;

DP3 R4.x, R0, c[18];

#now do the mashing!

# n = (n*(n*n*15731)+789221)+1376312589

# n = n /65535

# dont ask how I came by those numbers!

MUL R1, R3, R3;

MAD R2, R1, c[19].xxxx, c[19].yyyy;

MAD R1, R3, R2, c[19].zzzz;

MUL R3, R1, c[20].xxxx;

MUL R1, R4, R4;

MAD R2, R1, c[19].xxxx, c[19].yyyy;

MAD R1, R4, R2, c[19].zzzz;

MUL R4, R1, c[20].xxxx;

#keep fractional part

EXP R1.y, R3.x;

MOV R3.x, R1.y;

EXP R1.y, R3.y;

MOV R3.y, R1.y;

EXP R1.y, R3.z;

MOV R3.z, R1.y;

EXP R1.y, R3.w;

MOV R3.w, R1.y;

EXP R1.y, R4.x;

MOV R4.x, R1.y;

EXP R1.y, R4.y;

MOV R4.y, R1.y;

EXP R1.y, R4.z;

MOV R4.z, R1.y;

EXP R1.y, R4.w;

MOV R4.w, R1.y;

#multiply by 2 and subtact 1

#this gives a range -1 to 1

MAD R1, R3, c[16].yyyy, -c[16].xxxx;

MAD R2, R4, c[16].yyyy, -c[16].xxxx;

#a = u + t * (v - u)

#NB Proper slerps http://www.opengl.org/discussion_boards/ubb/wink.gif

#This does 4 at once

ADD R4, R2, -R1;

MAD R3, R4, R11.xxxx, R1;

#two slerps

MOV R4, R3.ywww;

ADD R1, R4, -R3;

MAD R2, R1, R11.yyyy, R3;

#one final slerp

MOV R3.x, R2.y;

ADD R1, R3, -R2;

MAD R3, R1, R11.zzzz, R2;

MOV R8.w, c[12].w;

MUL R8.xyz, v[NRML], R3.x;

MAD R8.xyz, R8, c[13].y, v[OPOS];

MAX o[COL0], R3, -R3;

DP4 o[HPOS].x, c[0], R8;

DP4 o[HPOS].y, c[1], R8;

DP4 o[HPOS].z, c[2], R8;

DP4 o[HPOS].w, c[3], R8;

END