Hello:
I am using two fragment programs to do a physical simulation on the GPU rather than software/CPU. I don’t need to see the frame until it is finished. I am using the GPU to try and go much faster than the algorithm runs on the CPU. I am on XP using a GeForce FX 5900 Ultra with the 45.23 driver. My 2D simulation uses three 512x512 textures and a viewport of the same size. My problem is that if I do 500 passes, it is very fast and my CPU usage does not max out. However, if I do more passes, say 1000, the execution time is not scaling linearly even though the code is just doing the same thing over and over again. At a higher number of passes, my CPU usage shoots up to 100% and stays there until it is done. Here is my rendering code:
void Process_CTSI()
{
int i;
if (CurrentStep == 0) start_time = clock();
for (i=0; i< NumSteps; i++)
{
glBindProgramNV(GL_FRAGMENT_PROGRAM_NV, fp_A_ID);
glEnable(GL_FRAGMENT_PROGRAM_NV);
glActiveTextureARB(GL_TEXTURE0_ARB);
glBindTexture(GL_TEXTURE_2D, TextureID_A);
glActiveTextureARB(GL_TEXTURE1_ARB);
glBindTexture(GL_TEXTURE_2D, TextureID_B);
glBegin(GL_TRIANGLE_STRIP);
glTexCoord2f(0,1); glVertex2f(-1,1);
glTexCoord2f(0,0); glVertex2f(-1,-1);
glTexCoord2f(1,1); glVertex2f(1,1);
glTexCoord2f(1,0); glVertex2f(1,-1);
glEnd();
glBindTexture(GL_TEXTURE_2D, TextureID_A);
glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, iWidth, iHeight);
glBindProgramNV(GL_FRAGMENT_PROGRAM_NV, fp_B_ID);
glActiveTextureARB(GL_TEXTURE0_ARB);
glBindTexture(GL_TEXTURE_2D, TextureID_B);
glActiveTextureARB(GL_TEXTURE1_ARB);
glBindTexture(GL_TEXTURE_2D, TextureID_A);
glActiveTextureARB(GL_TEXTURE2_ARB);
glBindTexture(GL_TEXTURE_2D, TextureID_C);
glBegin(GL_TRIANGLE_STRIP);
glTexCoord2f(0,1); glVertex2f(-1,1);
glTexCoord2f(0,0); glVertex2f(-1,-1);
glTexCoord2f(1,1); glVertex2f(1,1);
glTexCoord2f(1,0); glVertex2f(1,-1);
glEnd();
glDisable(GL_FRAGMENT_PROGRAM_NV);
glColor4f(Data[CurrentStep], 0.0, 0.0, 0.0);
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE);
glBegin(GL_LINES);
glVertex2f(-1.0, -0.75);
glVertex2f(1.0, -0.75);
glEnd();
glBindTexture(GL_TEXTURE_2D, TextureID_B);
glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, iWidth, iHeight);
glDisable(GL_BLEND);
CurrentStep++;
}
if (CurrentStep==NumSteps)
{ glutSwapBuffers();
end_time = clock();
simulation_time = (double) (end_time - start_time) / CLOCKS_PER_SEC;
cout<<"Start Time = "<<start_time<<endl;
cout<<"End Time = "<<end_time<<endl;
cout<<"RTT simulation_time = "<<simulation_time<<endl;
cout<<"NumSteps = "<<NumSteps<<endl;
cout.flush();
}
}
Can anyone tell me why it should run so much slower when NumSteps = 1000 as compared to when NumSteps = 500 and why the CPU usage goes so high with the longer run time? Also, when I put the above rendering code into two seperate display lists and call them in the loop, with the glColor4f(Data[],…) call in between, it actually runs slower than without the display lists. Shouldn’t it run faster? Confused.
Thanks.
[This message has been edited by sek (edited 09-30-2003).]
[This message has been edited by sek (edited 09-30-2003).]