Part of the Khronos Group
OpenGL.org

The Industry's Foundation for High Performance Graphics

from games to virtual reality, mobile phones to supercomputers

Page 4 of 4 FirstFirst ... 234
Results 31 to 34 of 34

Thread: transform feedback + glDrawElementsInstanced

  1. #31
    Junior Member Regular Contributor peterfilm's Avatar
    Join Date
    Sep 2009
    Location
    UK
    Posts
    124
    well i asked for the limits on the quadro 4000, and got:-
    GL_MAX_VERTEX_ATOMIC_COUNTERS: 16384
    GL_MAX_GEOMETRY_ATOMIC_COUNTERS: 16384
    GL_MAX_FRAGMENT_ATOMIC_COUNTERS: 16384

    so i tried it, using atomic counters i mean, backed by a buffer.

    results (sorry, fps again):-

    instances: 25798
    triangles: 186696
    GPU: 705fps
    CPU: 410fps



    pretty damn good!
    I know this isn't a real stress test, but i'm having trouble with the tool that generates the instances...can't get enough of em to produce a realistic load.

  2. #32
    Junior Member Regular Contributor peterfilm's Avatar
    Join Date
    Sep 2009
    Location
    UK
    Posts
    124
    Code :
    #version 420 core
     
     
    #ifdef GL_VERTEX_SHADER
     
     
    in vec4 attrib_row1;		// xyz=axisX, w=translationX
    in vec4 attrib_row2;		// xyz=axisY, w=translationY
    in vec4 attrib_row3;		// xyz=axisZ, w=translationZ
    in vec4 attrib_bsphere;		// bounding sphere xyz=centre, w=radius
     
     
    out vec4 vsRow1;
    out vec4 vsRow2;
    out vec4 vsRow3;
    flat out int vsVisible;
     
     
    uniform vec4 uni_frustum[6];	// the 6 world space frustum planes
     
     
    void main() {
    	vsRow1 = attrib_row1;
    	vsRow2 = attrib_row2;
    	vsRow3 = attrib_row3;
     
     
    	vsVisible = 1;
     
    	// is instance in frustum?
    	for (int i=0; i<6; ++i) {
    		float d = dot(uni_frustum[i], vec4(attrib_bsphere.xyz, 1.0));
    		if (d <= -attrib_bsphere.w) {
    			vsVisible = 0;
    			break;
    		}
    	}
    }
     
     
    #endif
     
     
    #ifdef GL_GEOMETRY_SHADER
     
     
    layout(points) in;
    layout(points, max_vertices = 1) out;
     
     
    uniform vec3 uni_camPos;		// xyz=world space camera position
    uniform vec4 uni_lodDist;		// lod distances for x=lod0, y=lod1, z=lod2, w=lod3
     
     
    in vec4 vsRow1[1];
    in vec4 vsRow2[1];
    in vec4 vsRow3[1];
    flat in int vsVisible[1];
     
     
    layout(stream=0) out vec4 gsOut0Row1;
    layout(stream=0) out vec4 gsOut0Row2;
    layout(stream=0) out vec4 gsOut0Row3;
    layout(stream=1) out vec4 gsOut1Row1;
    layout(stream=1) out vec4 gsOut1Row2;
    layout(stream=1) out vec4 gsOut1Row3;
    layout(stream=2) out vec4 gsOut2Row1;
    layout(stream=2) out vec4 gsOut2Row2;
    layout(stream=2) out vec4 gsOut2Row3;
    layout(stream=3) out vec4 gsOut3Row1;
    layout(stream=3) out vec4 gsOut3Row2;
    layout(stream=3) out vec4 gsOut3Row3;
     
     
    layout(binding = 0, offset = 4) uniform atomic_uint LodCount0;
    layout(binding = 0, offset = 24) uniform atomic_uint LodCount1;
    layout(binding = 0, offset = 44) uniform atomic_uint LodCount2;
    layout(binding = 0, offset = 64) uniform atomic_uint LodCount3;
     
     
    void main() {
    	if (vsVisible[0]==1) {
    		float dist = distance(vec3(vsRow1[0].w, vsRow2[0].w, vsRow3[0].w), uni_camPos);
    		if (dist < uni_lodDist.x) {
    			gsOut0Row1 = vsRow1[0];
    			gsOut0Row2 = vsRow2[0];
    			gsOut0Row3 = vsRow3[0];
    			atomicCounterIncrement(LodCount0);
    			EmitStreamVertex(0);
    		}
    		else if (dist < uni_lodDist.y) {
    			gsOut1Row1 = vsRow1[0];
    			gsOut1Row2 = vsRow2[0];
    			gsOut1Row3 = vsRow3[0];
    			atomicCounterIncrement(LodCount1);
    			EmitStreamVertex(1);
    		}
    		else if (dist < uni_lodDist.z) {
    			gsOut2Row1 = vsRow1[0];
    			gsOut2Row2 = vsRow2[0];
    			gsOut2Row3 = vsRow3[0];
    			atomicCounterIncrement(LodCount2);
    			EmitStreamVertex(2);
    		}
    		else if (dist < uni_lodDist.w)
    		{
    			gsOut3Row1 = vsRow1[0];
    			gsOut3Row2 = vsRow2[0];
    			gsOut3Row3 = vsRow3[0];
    			atomicCounterIncrement(LodCount3);
    			EmitStreamVertex(3);
    		}
    	}
    }
     
     
    #endif

  3. #33
    Advanced Member Frequent Contributor
    Join Date
    Apr 2010
    Location
    Germany
    Posts
    892
    Just a minor observation:

    Code :
    float dist = distance(vec3(vsRow1[0].w, vsRow2[0].w, vsRow3[0].w), uni_camPos);


    I can't tell if it will have a significant impact in your case but if the range of values permits you could use square distance to get rid of the sqrt here:

    Code :
    vec3 distVec = vec3(vsRow1[0].w, vsRow2[0].w, vsRow3[0].w) - uni_camPos;
    float sqrDist = dot(distVec, distVect);

    If course you'll have to account for that during LOD selection as well, i.e. store squared distances in uni_lodDist.

  4. #34
    Junior Member Regular Contributor peterfilm's Avatar
    Join Date
    Sep 2009
    Location
    UK
    Posts
    124
    yup, i know, this is a simple test - i found early on that it made no real difference to performance on the GPU but did on the CPU so I decided to leave it with true length on both implementations to make it fair.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •