Compute info
------------
Internal error: assembly compile error for compute shader at offset 108543:
-- error message --
line 2094, column 6: error: BAR not allowed inside flow control blocks.
line 2108, column 6: error: BAR not allowed inside flow control blocks.
-- internal assembly text --
!!NVcp5.0
OPTION NV_shader_storage_buffer;
OPTION NV_shader_atomic_float;
GROUP_SIZE 32 32;
# cgc version 3.1.0001, build date Aug 8 2012
# command line args:
#vendor NVIDIA Corporation
#version 3.1.0.1
#profile gp5cp
#program main
#semantic As : SHARED
#semantic Bs : SHARED
#semantic oBuffer : SBO_BUFFER[2]
#semantic iBuffer : SBO_BUFFER[0]
#semantic oBuffer : SBO_BUFFER[1]
#var float Bs[0][0] : : shared_mem[8188] : -1 : 1
# lots more
#var float Bs[31][31] : : shared_mem[8188] : -1 : 1
#var uint3 gl_GlobalInvocationID : $vin.GBLID : GBLID[4] : -1 : 1
#var uint3 gl_WorkGroupID : $vin.CTAID : CTAID[2] : -1 : 1
#var float C[0] : : sbo_buffer[2][0] : -1 : 1
#var float A[0] : : sbo_buffer[0][0] : -1 : 1
#var float B[0] : : sbo_buffer[1][0] : -1 : 1
SHARED_MEMORY 8192;
SHARED shared_mem[] = { program.sharedmem };
STORAGE sbo_buf0[] = { program.storage[0] };
STORAGE sbo_buf1[] = { program.storage[1] };
STORAGE sbo_buf2[] = { program.storage[2] };
TEMP R0, R1;
MOV.F R0.y, {0, 0, 0, 0}.x;
MUL.U R0.z, invocation.groupid.y, {32768, 0, 0, 0}.x;
MUL.U R0.w, invocation.groupid.x, {32, 0, 0, 0}.x;
REP.S {32, 0, 0, 0};
MAD.U R1.x, invocation.globalid.y, {1024, 0, 0, 0}, R0.w;
MAD.U R0.x, invocation.globalid.y, {1024, 0, 0, 0}, R0.z;
ADD.U R0.x, invocation.globalid, R0;
MUL.S R1.y, R0.x, {4, 0, 0, 0}.x;
ADD.U R1.x, invocation.globalid, R1;
MUL.S R0.x, invocation.globalid.y, {128, 0, 0, 0};
MOV.U R1.z, R1.y;
MAD.S R1.y, invocation.globalid.x, {4, 0, 0, 0}.x, R0.x;
LDB.F32 R0.x, sbo_buf0[R1.z];
MOV.U R1.y, R1;
STS.F32 R0, shared_mem[R1.y];
MUL.S R1.x, R1, {4, 0, 0, 0};
MOV.U R0.x, R1;
LDB.F32 R0.x, sbo_buf1[R0.x];
STS.F32 R0, shared_mem[R1.y + 4096];
BAR ;
MOV.S R1.y, {0, 0, 0, 0}.x;
REP.S {32, 0, 0, 0};
MUL.S R1.x, R1.y, {128, 0, 0, 0};
MAD.S R1.x, invocation.globalid, {4, 0, 0, 0}, R1;
MUL.S R0.x, invocation.globalid.y, {128, 0, 0, 0};
MAD.S R0.x, R1.y, {4, 0, 0, 0}, R0;
MOV.U R1.z, R1.x;
MOV.U R1.x, R0;
LDS.F32 R0.x, shared_mem[R1.z + 4096];
LDS.F32 R1.x, shared_mem[R1.x];
MAD.F R0.y, R1.x, R0.x, R0;
ADD.S R1.y, R1, {1, 0, 0, 0}.x;
ENDREP;
BAR ;
ADD.U R0.z, R0, {32, 0, 0, 0}.x;
ADD.U R0.w, R0, {32768, 0, 0, 0}.x;
ENDREP;
MUL.U R0.x, invocation.groupid, {32, 0, 0, 0};
MAD.U R0.x, invocation.groupid.y, {32768, 0, 0, 0}, R0;
MAD.U R0.x, invocation.globalid.y, {1024, 0, 0, 0}, R0;
ADD.U R0.x, R0, invocation.globalid;
MUL.S R0.x, R0, {4, 0, 0, 0};
MOV.U R0.x, R0;
STB.F32 R0.y, sbo_buf2[R0.x];
END
# 44 instructions, 2 R-regs