I have make/debugged a new version that compute difference between goods values given by sin/cos and approximed/optimised versions of them
#include <chrono>
#include <iostream>
#include <unistd.h>
#include <time.h>
#include <math.h>
#define PRECISION float
#define BENCHSIZE 10000000
#define WRAPPING 1
// #define BENCHINC 0.000001f
PRECISION BENCHINC = (6.28318531 / BENCHSIZE) * 2.0f;
PRECISION BENCHSTART = -BENCHSIZE * BENCHINC;
///////////////////////////
// Chronometer functions //
///////////////////////////
using Clock = std::chrono::high_resolution_clock;
using std::chrono::milliseconds;
using std::chrono::nanoseconds;
using std::chrono::duration_cast;
auto start = Clock::now();
auto end = Clock::now();
auto delai = duration_cast<milliseconds>(end-start).count();
inline void BeginTimer()
{
start = Clock::now();
}
inline void EndTimer()
{
end = Clock::now();
}
void PrintTimer(char *title)
{
// std::cout << duration_cast<nanoseconds>(end-start).count() << " ns
";
std::cout << title << " : "<< duration_cast<milliseconds>(end-start).count() << " ms ";
}
///////////////////////////////
// Tests standards functions //
///////////////////////////////
void TestStandardSinus(char *title)
{
int i;
PRECISION val, sinus;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
sinus = sin(val);
}
EndTimer();
PrintTimer(title);
}
void TestStandardCosinus(char *title)
{
int i;
PRECISION val, cosinus;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
cosinus = cos(val);
}
EndTimer();
PrintTimer(title);
}
//////////////////////////////////
// Fast approximation functions //
//////////////////////////////////
inline PRECISION fast_sin(PRECISION x)
{
PRECISION sin;
//always wrap input angle to -PI..PI
#ifdef WRAPPING
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
//compute sine
if (x < 0)
sin = 1.27323954 * x + 0.405284735 * x * x;
else
sin = 1.27323954 * x - 0.405284735 * x * x;
return sin;
}
inline PRECISION fast_cos( PRECISION x)
{
PRECISION cos;
// cos(x) = sin(x + PI/2)
x += 1.57079632;
//always wrap input angle to -PI..PI
#ifdef WRAPPING
if (x < -3.14159265)
while ( x < -3.14159265)
x += 6.28318531;
// else
if (x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
if (x < 0)
cos = 1.27323954 * x + 0.405284735 * x * x;
else
cos = 1.27323954 * x - 0.405284735 * x * x;
return cos;
}
////////////////////////////////////////
// Tests fast approximation functions //
////////////////////////////////////////
void TestFastSinus(char *title)
{
int i;
PRECISION val, sinus;
PRECISION ref, diff, moy = 0, variance = 0, mini = 1.0f , maxi = -1.0f;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
sinus = fast_sin(val);
}
EndTimer();
PrintTimer(title);
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
ref = sin(val);
sinus = fast_sin(val);
diff = sinus - ref;
moy += diff;
variance += diff * diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
// std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max)=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
void TestFastCosinus(char *title)
{
int i;
PRECISION val, cosinus;
PRECISION ref, diff, moy = 0, variance = 0, mini = 1.0f , maxi = -1.0f;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
cosinus = fast_cos(val);
}
EndTimer();
PrintTimer(title);
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
ref = cos(val);
cosinus = fast_cos(val);
diff = cosinus - ref;
moy += diff;
variance += diff*diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
//std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max)=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
///////////////////////////////
// Tests ultra fast versions //
///////////////////////////////
void CheckUltraFastSinus()
{
int i;
PRECISION val, sinus, x;
PRECISION ref, diff, moy = 0, variance=0, mini = 1.0f , maxi = -1.0f;
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
x = val;
ref = sin(val);
#ifdef WRAPPING
// wrap input angle to -PI..PI
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
//compute sine
if (x < 0)
sinus = 1.27323954 * x + .405284735 * x * x;
else
sinus = 1.27323954 * x - 0.405284735 * x * x;
// check diff
diff = sinus - ref;
moy += diff;
variance += diff * diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
//std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
void TestUltraFastSinus(char *title)
{
int i;
PRECISION val, sinus, x;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
x = val;
#ifdef WRAPPING
// wrap input angle to -PI..PI
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531 ;
#endif
//compute sine
if (x < 0)
sinus = 1.27323954 * x + 0.405284735 * x * x;
else
sinus = 1.27323954 * x - 0.405284735 * x * x;
}
EndTimer();
PrintTimer(title);
CheckUltraFastSinus();
}
void CheckUltraFastCosinus()
{
int i;
PRECISION val, cosinus, x;
PRECISION ref, diff, moy = 0, variance = 0, mini = 1.0f, maxi = -1.0f;
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
// cos(x) = sin(x + PI/2)
x = val + 1.57079632;
ref = cos(val);
#ifdef WRAPPING
// wrap input angle to -PI..PI
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
//compute cosine
if (x < 0)
cosinus = 1.27323954 * x + 0.405284735 * x * x;
else
cosinus = 1.27323954 * x - 0.405284735 * x * x;
// check diff
diff = cosinus - ref;
moy += diff;
variance += diff * diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
//std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max)=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
void TestUltraFastCosinus(char *title)
{
int i;
PRECISION val, cosinus, x;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
// cos(x) = sin(x + PI/2)
x = val + 1.57079632;
#ifdef WRAPPING
// wrap input angle to -PI..PI
if (x < -3.14159265)
while ( x < -3.14159265)
x += 6.28318531;
// else
if (x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
if (x < 0)
cosinus = 1.27323954 * x + 0.405284735 * x * x;
else
cosinus = 1.27323954 * x - 0.405284735 * x * x;
}
EndTimer();
PrintTimer(title);
CheckUltraFastCosinus();
}
///////////////////////////////////////////////
// Fast but precises approximation functions //
///////////////////////////////////////////////
inline PRECISION fast_precise_sin(PRECISION x)
{
PRECISION sin;
//always wrap input angle to -PI..PI
#ifdef WRAPPING
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
//compute sine
if (x < 0)
{
sin = 1.27323954 * x + .405284735 * x * x;
if (sin < 0)
sin = .225 * (sin *-sin - sin) + sin;
else
sin = .225 * (sin * sin - sin) + sin;
}
else
{
sin = 1.27323954 * x - 0.405284735 * x * x;
if (sin < 0)
sin = .225 * (sin *-sin - sin) + sin;
else
sin = .225 * (sin * sin - sin) + sin;
}
return sin;
}
inline PRECISION fast_precise_cos( PRECISION x)
{
PRECISION cos;
// cos(x) = sin(x + PI/2)
x += 1.57079632;
// always wrap input angle to -PI..PI
#ifdef WRAPPING
if (x < -3.14159265)
while ( x < -3.14159265)
x += 6.28318531;
// else
if (x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
if (x < 0)
{
cos = 1.27323954 * x + 0.405284735 * x * x;
if (cos < 0)
cos = .225 * (cos *-cos - cos) + cos;
else
cos = .225 * (cos * cos - cos) + cos;
}
else
{
cos = 1.27323954 * x - 0.405284735 * x * x;
if (cos < 0)
cos = .225 * (cos *-cos - cos) + cos;
else
cos = .225 * (cos * cos - cos) + cos;
}
return cos;
}
////////////////////////////////////////////////////
// Tests fast but precise approximation functions //
////////////////////////////////////////////////////
void TestFastPreciseSinus(char *title)
{
int i;
PRECISION val, sinus;
PRECISION ref, diff, moy = 0, variance = 0, mini = 1.0f , maxi = -1.0f;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
sinus = fast_precise_sin(val);
}
EndTimer();
PrintTimer(title);
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
ref = sin(val);
sinus = fast_precise_sin(val);
diff = sinus - ref;
moy += diff;
variance += diff*diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
//std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max)=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
void TestFastPreciseCosinus(char *title)
{
int i;
PRECISION val, cosinus;
PRECISION ref, diff, moy = 0, variance=0, mini = 1.0f , maxi = -1.0f;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
cosinus = fast_precise_cos(val);
}
EndTimer();
PrintTimer(title);
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
ref = cos(val);
cosinus = fast_precise_cos(val);
diff = cosinus - ref;
moy += diff;
variance += diff*diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
//std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max)=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
///////////////////////////////////////////
// Tests ultra fast and precise versions //
///////////////////////////////////////////
void CheckUltraFastPreciseSinus()
{
int i;
PRECISION val, sinus, x;
PRECISION ref, diff, moy = 0, variance=0, mini = 1.0f , maxi = -1.0f;
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
x = val;
ref = sin(val);
#ifdef WRAPPING
// wrap input angle to -PI..PI
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
//compute sine
if (x < 0)
{
sinus = 1.27323954 * x + .405284735 * x * x;
if (sinus < 0)
sinus = .225 * (sinus *-sinus - sinus) + sinus;
else
sinus = .225 * (sinus * sinus - sinus) + sinus;
}
else
{
sinus = 1.27323954 * x - 0.405284735 * x * x;
if (sinus < 0)
sinus = .225 * (sinus *-sinus - sinus) + sinus;
else
sinus = .225 * (sinus * sinus - sinus) + sinus;
}
// check diff
diff = sinus - ref;
moy += diff;
variance += diff * diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
//std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
void TestUltraFastPreciseSinus(char *title)
{
int i;
PRECISION val, sinus, x;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
x = val;
#ifdef WRAPPING
// wrap input angle to -PI..PI
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531 ;
#endif
//compute sine
if (x < 0)
{
sinus = 1.27323954 * x + .405284735 * x * x;
if (sinus < 0)
sinus = .225 * (sinus *-sinus - sinus) + sinus;
else
sinus = .225 * (sinus * sinus - sinus) + sinus;
}
else
{
sinus = 1.27323954 * x - 0.405284735 * x * x;
if (sinus < 0)
sinus = .225 * (sinus *-sinus - sinus) + sinus;
else
sinus = .225 * (sinus * sinus - sinus) + sinus;
}
}
EndTimer();
PrintTimer(title);
CheckUltraFastPreciseSinus();
}
void CheckUltraFastPreciseCosinus()
{
int i;
PRECISION val, cosinus, x;
PRECISION ref, diff, moy = 0, variance = 0, mini = 1.0f, maxi = -1.0f;
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
// cos(x) = sin(x + PI/2)
x = val + 1.57079632;
ref = cos(val);
#ifdef WRAPPING
// wrap input angle to -PI..PI
if ( x < -3.14159265 )
while ( x < -3.14159265)
x += 6.28318531 ;
// else
if ( x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
if (x < 0)
{
cosinus = 1.27323954 * x + 0.405284735 * x * x;
if (cosinus < 0)
cosinus = .225 * (cosinus *-cosinus - cosinus) + cosinus;
else
cosinus = .225 * (cosinus * cosinus - cosinus) + cosinus;
}
else
{
cosinus = 1.27323954 * x - 0.405284735 * x * x;
if (cosinus < 0)
cosinus = .225 * (cosinus *-cosinus - cosinus) + cosinus;
else
cosinus = .225 * (cosinus * cosinus - cosinus) + cosinus;
}
// check diff
diff = cosinus - ref;
moy += diff;
variance += diff * diff;
if ( diff < mini )
mini = diff;
if ( diff > maxi )
maxi = diff;
}
moy /= i;
variance = sqrt(variance / i);
//std::cout << "(min/moy/max=" << mini << "/" << moy << "/" << maxi << ")";
printf("(min/moy/variance/max)=(%1.9f/%1.9f/%1.9f/%1.9f)", mini, moy, variance, maxi);
}
void TestUltraFastPreciseCosinus(char *title)
{
int i;
PRECISION val, cosinus, x;
BeginTimer();
for( i = 0, val = -BENCHSTART; i < BENCHSIZE ; i++ , val += BENCHINC)
{
// cos(x) = sin(x + PI/2)
x = val + 1.57079632;
#ifdef WRAPPING
// wrap input angle to -PI..PI
if (x < -3.14159265)
while ( x < -3.14159265)
x += 6.28318531;
// else
if (x > 3.14159265)
while ( x > 3.14159265)
x -= 6.28318531;
#endif
if (x < 0)
{
cosinus = 1.27323954 * x + 0.405284735 * x * x;
if (cosinus < 0)
cosinus = .225 * (cosinus *-cosinus - cosinus) + cosinus;
else
cosinus = .225 * (cosinus * cosinus - cosinus) + cosinus;
}
else
{
cosinus = 1.27323954 * x - 0.405284735 * x * x;
if (cosinus < 0)
cosinus = .225 * (cosinus *-cosinus - cosinus) + cosinus;
else
cosinus = .225 * (cosinus * cosinus - cosinus) + cosinus;
}
}
EndTimer();
PrintTimer(title);
CheckUltraFastPreciseCosinus();
}
//////////
// main //
//////////
int main()
{
std::cout << "
Benchmark cos()/sin() optimization with " << BENCHSIZE << " iterations" << "
";
std::cout << "(from " << BENCHSTART << " radians to " << -BENCHSTART << " radians with a stepsize of " << BENCHINC << ")
";
TestStandardSinus( "Standard C/C++ sin() "); std::cout << "
";
TestStandardCosinus( "Standard C/C++ cos() "); std::cout << "
";
TestFastPreciseSinus( "inlined fast_precise_sin() "); std::cout << "
";
TestFastPreciseCosinus( "inlined fast_precise_cos() "); std::cout << "
";
TestUltraFastPreciseSinus( "UltraFastPreciseSinus() "); std::cout << "
";
TestUltraFastPreciseCosinus( "UltraFastPreciseCosinus() "); std::cout << "
";
TestFastSinus( "inlined fast_sin() "); std::cout << "
";
TestFastCosinus( "inlined fast_cos() "); std::cout << "
";
TestUltraFastSinus( "UltraFastSinus() "); std::cout << "
";
TestUltraFastCosinus( "UltraFastCosinus() "); std::cout << "
";
}
This give this with the float version :
Benchmark cos()/sin() optimization with 10000000 iterations
(from -12.5664 radians to 12.5664 radians with a stepsize of 1.25664e-06)
Standard C/C++ sin() : 745 ms
Standard C/C++ cos() : 774 ms
inlined fast_precise_sin() : 598 ms (min/moy/variance/max)=(-0.001090974/0.000029710/0.000597704/0.001090437)
inlined fast_precise_cos() : 695 ms (min/moy/variance/max)=(-0.001091599/-0.000001502/0.000588869/0.001091063)
UltraFastPreciseSinus() : 520 ms (min/moy/variance/max=(-0.001090974/0.000029710/0.000597704/0.001090437)
UltraFastPreciseCosinus() : 702 ms (min/moy/variance/max)=(-0.001091599/-0.000001502/0.000588869/0.001091063)
inlined fast_sin() : 506 ms (min/moy/variance/max)=(-0.056010097/0.009407033/0.035580181/0.056009740)
inlined fast_cos() : 589 ms (min/moy/variance/max)=(-0.056010574/-0.000039861/0.035341624/0.056010306)
UltraFastSinus() : 382 ms (min/moy/variance/max=(-0.056010097/0.009407033/0.035580181/0.056009740)
UltraFastCosinus() : 461 ms (min/moy/variance/max)=(-0.056010574/-0.000039861/0.035341624/0.056010306)
And with the double version :
(only change the “#define PRECISION float” by “#define PRECISION double” into the source code)
Benchmark cos()/sin() optimization with 10000000 iterations
(from -12.5664 radians to 12.5664 radians with a stepsize of 1.25664e-06)
Standard C/C++ sin() : 737 ms
Standard C/C++ cos() : 765 ms
inlined fast_precise_sin() : 903 ms (min/moy/variance/max)=(-0.001090303/0.000000000/0.000596678/0.001090319)
inlined fast_precise_cos() : 956 ms (min/moy/variance/max)=(-0.001090311/-0.000000000/0.000596679/0.001090326)
UltraFastPreciseSinus() : 526 ms (min/moy/variance/max=(-0.001090303/0.000000000/0.000596678/0.001090319)
UltraFastPreciseCosinus() : 571 ms (min/moy/variance/max)=(-0.001090311/-0.000000000/0.000596679/0.001090326)
inlined fast_sin() : 839 ms (min/moy/variance/max)=(-0.056009604/0.000000000/0.035836168/0.056009589)
inlined fast_cos() : 801 ms (min/moy/variance/max)=(-0.056009610/-0.000000000/0.035836167/0.056009594)
UltraFastSinus() : 403 ms (min/moy/variance/max=(-0.056009604/0.000000000/0.035836168/0.056009589)
UltraFastCosinus() : 462 ms (min/moy/variance/max)=(-0.056009610/-0.000000000/0.035836167/0.056009594)
Only the UltraFast[Precise]Sinus/Cosinus() implementations are faster than standards cos/sin() functions in the double version
(the fast_sin/cos() and fast_precise_sin/cos() funcs are more slow in the double version)
[but they are alls more speed on the float version :)]
PS : I don’t see really an amelioration with the double version compared to the float version => this is normal ???
(the float version is more fast but on the other side the double version don’t seem to be more precise …)