Only glClear(..) color is displayed, nothing else rendered (CUDA/OpenGL interop)

I have a WinForms application with a panel (500x500 pixels) that I want to render something in. At this point I am just trying to fill it in with a specific color. I want to use OpenGL/CUDA interop to do this.

I got the panel configured to be the region to render stuff in, however when I run my code, the panel just gets filled with the glClear(…) color, and nothing assigned by the kernel is displayed. It sort of worked this morning (inconsistently), and in my attempt to sort out the SwapBuffers() mess, I think I screwed it up.

Here is the pixel format initialization for OpenGL. It seems to work fine, I have the two buffers as I expected, and the context is correct:

static  PIXELFORMATDESCRIPTOR pfd=              
{
    sizeof(PIXELFORMATDESCRIPTOR),              // Size Of This Pixel Format Descriptor
    1,                                          // Version Number
    PFD_DRAW_TO_WINDOW |                        // Format Must Support Window
    PFD_SUPPORT_OPENGL |                        // Format Must Support OpenGL
    PFD_DOUBLEBUFFER,                           // Must Support Double Buffering
    PFD_TYPE_RGBA,                              // Request An RGBA Format
    16,                                         // Select Our Color Depth
    0, 0, 0, 0, 0, 0,                           // Color Bits Ignored
    0,                                          // No Alpha Buffer
    0,                                          // Shift Bit Ignored
    0,                                          // No Accumulation Buffer
    0, 0, 0, 0,                                 // Accumulation Bits Ignored
    16,                                         // 16Bit Z-Buffer (Depth Buffer) 
    0,                                          // No Stencil Buffer
    0,                                          // No Auxiliary Buffer
    PFD_MAIN_PLANE,                             // Main Drawing Layer
    0,                                          // Reserved
    0, 0, 0                                     // Layer Masks Ignored
};

GLint  iPixelFormat; 

// get the device context's best, available pixel format match 
if((iPixelFormat = ChoosePixelFormat(hdc, &pfd)) == 0)
{
    MessageBox::Show("ChoosePixelFormat Failed");
    return 0;
}

// make that match the device context's current pixel format 
if(SetPixelFormat(hdc, iPixelFormat, &pfd) == FALSE)
{
    MessageBox::Show("SetPixelFormat Failed");
    return 0;
}

if((m_hglrc = wglCreateContext(m_hDC)) == NULL)
{
    MessageBox::Show("wglCreateContext Failed");
    return 0;
}

if((wglMakeCurrent(m_hDC, m_hglrc)) == NULL)
{
    MessageBox::Show("wglMakeCurrent Failed");
    return 0;
}   

After this is done, I set up the ViewPort as such:

glViewport(0,0,iWidth,iHeight);                     // Reset The Current Viewport
glMatrixMode(GL_MODELVIEW);                         // Select The Modelview Matrix
glLoadIdentity();                                   // Reset The Modelview Matrix
glEnable(GL_DEPTH_TEST);

Then I set up the clear color and do a clear:

glClearColor(1.0f, 0.0f, 0.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);  

Now I set up the CUDA/OpenGL interop:

cudaDeviceProp prop; int dev;
memset(&prop, 0, sizeof(cudaDeviceProp));
prop.major = 1; prop.minor = 0;

checkCudaErrors(cudaChooseDevice(&dev, &prop));
checkCudaErrors(cudaGLSetGLDevice(dev));

glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");

GLuint bufferID;
cudaGraphicsResource * resourceID;

glGenBuffers(1, &bufferID);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferID);
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, fWidth*fHeight*4, NULL, GL_DYNAMIC_DRAW_ARB);

checkCudaErrors(cudaGraphicsGLRegisterBuffer( &resourceID, bufferID, cudaGraphicsMapFlagsNone ));

Now I try to call my kernel (which just paints each pixel a specific color) and have that displayed.

uchar4* devPtr;
size_t size;

// First clear the back buffer:
glClearColor(1.0f, 0.5f, 0.0f, 0.0f); // orange
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

checkCudaErrors(cudaGraphicsMapResources(1, &resourceID, NULL));
checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resourceID));

animate(devPtr); // This will call the kernel and do a sync (see later)

checkCudaErrors(cudaGraphicsUnmapResources(1, &resourceID, NULL));

// Swap buffers to bring back buffer forward:
SwapBuffers(m_hDC);

At this point I expect to see the kernel colors on the screen, but no! I see orange, which is the clear color that I just set.

If you are curious, Here is the call to the kernel:

void animate(uchar4* dispPtr)
{
    checkCudaErrors(cudaDeviceSynchronize());
    animKernel<<<blocks, threads>>>(dispPtr, envdim);;
    checkCudaErrors(cudaDeviceSynchronize());
}

Here envdim is just the dimensions (so 500x500). The kernel itself:

__global__ void animKernel(uchar4 *optr, dim3 matdim)
{
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * matdim.x;

    if (x < matdim.x && y < matdim.y)
    {
        // BLACK:
        optr[offset].x = 0; optr[offset].y = 0; optr[offset].z = 0;
    }
}

Things I’ve done:

  1. The value returned by cudaGraphicsResourceGetMappedPointer’s size is 1000000, which corresponds to the 500x500 matrix of uchar4, so that’s good.

  2. Each kernel printed the value and location that it was writing to, and that seemed ok.

  3. Played with the alpha value for the clear color, but that doesn’t seem to do anything (yet?)

So I guess I’m missing something, but I’m going kind of crazy looking for it. Any advice? Help?