CUDA with OpenGL

MasterKiten · October 18, 2011, 2:07pm

Hi Im using CUDA as my base API and trying to interpolate it with OpenGL to make my display faster. Im currently using 9800GT and 9500GT Nvidia GPU on windows platform.

I have compiled my program and it says Im having unspecified device error at

cutilSafeCall(cudaGraphicsGLRegisterBuffer(cuda_pixel_resource, *pixel_buffer, cudaGraphicsMapFlagsWriteDiscard));

which is an error message captured by cutilSafeCall. I read some pages regarding OpenGL and someone answered it would have been a problem related to glewInit() and when I placed after glutInit() and related calls, I get access violation. Following is the code without changing position of glewInit().

// includes, system

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

// includes, GL
#include <GL/glew.h>
//#include <GL/freeglut.h>
#include <GL/glut.h>

#include <cuda.h>
#include <cudaGL.h>

// includes CUDA <–> C++ interlops
#include <cuda_runtime.h>
#include <cutil_inline.h>
#include <cutil_gl_inline.h>
#include <cutil_gl_error.h>
#include <cuda_gl_interop.h>

//define variables

// variables into cuda
const unsigned int window_width = 1000;
const unsigned int window_height = 1024;

float4 *d_vbo_buffer;

// pbo variables
GLuint pbo;
struct cudaGraphicsResource *cuda_pbo_resource;

extern “C”
void launch_kernel(int* sum, float* original_data, int* frame_num, cuComplex* d, float* k_resamp, float* dc_subtracted, float* resample, float4 *ptr);

void createVBO(GLuint* pixel_buffer, struct cudaGraphicsResource **cuda_pixel_resource)
{

if(pixel_buffer){
// create buffer object
glGenBuffers(1, pixel_buffer);
glBindBuffer(GL_ARRAY_BUFFER, *pixel_buffer);

//register buffer on cuda
unsigned int size = XDIM * YDIM * 4 * sizeof(float); 
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); 
glBindBuffer(GL_ARRAY_BUFFER, 0); 

cutilSafeCall(cudaGraphicsGLRegisterBuffer(cuda_pixel_resource, *pixel_buffer, cudaGraphicsMapFlagsWriteDiscard));
}
else{
cutilSafeCall( cudaMalloc( (void **)&d_vbo_buffer, XDIM*YDIM*4*sizeof(float) ) );
}

}

void display() {

// Map buffer object for writing from CUDA 
float4* pix_buff; 
cudaGraphicsMapResources(1, &cuda_pbo_resource, 0); 
size_t num_bytes; 
cudaGraphicsResourceGetMappedPointer((void**)&pix_buff, &num_bytes, cuda_pbo_resource);

// Execute kernel 
// will execute by calling kernel.cu file
if(pbo)
launch_kernel(dev_sum, dev_original, dev_frame, d_in, dev_k_resampledspacing, dc_subtracted, dev_resamp, pix_buff);
else launch_kernel(dev_sum, dev_original, dev_frame, d_in, dev_k_resampledspacing, dc_subtracted, dev_resamp, d_vbo_buffer);
// Unmap buffer object 
cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0);

// copy to Open_gl texture
// Render from buffer object 
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); 
glBindBuffer(GL_ARRAY_BUFFER, pbo); 
glVertexPointer(4, GL_FLOAT, 0, 0); 
glEnableClientState(GL_VERTEX_ARRAY); 
glDrawArrays(GL_POINTS, 0, XDIM * YDIM); 
glDisableClientState(GL_VERTEX_ARRAY); 

// Swap buffers 
glutSwapBuffers(); 
glutPostRedisplay();

}

void deleteVBO() {
cudaGraphicsUnregisterResource(cuda_pbo_resource);
glDeleteBuffers(1, &pbo);
}

int main(int argc, char *argv[]) {
// Pre-Calculation
readData();
convertor(); //convert values into single float
calc_resample_coefficients();

//Cuda Memory Allocation
int frame = 0, sum =0;

cudaMalloc((void**)&dev_frame, sizeof(int));
cudaMalloc((void**)&dev_sum, sizeof(int));
cudaMalloc((void**)&dev_original, sizeof(float)*FRAMES*XDIM*YDIM);
cudaMalloc((void**)&d_in,sizeof(cuComplex)*YDIM*XDIM);
cudaMalloc((void**)&dev_resamp, sizeof(float)*LINE_LENGTH*2);
cudaMalloc((void**)&dev_k_resampledspacing, sizeof(float));
cudaMalloc((void**)&dc_subtracted, sizeof(float)*XDIM*YDIM);
	
cudaMemcpy(dev_k_resampledspacing, &k_resampledspacing, sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(dev_resamp, resamp_1D, sizeof(float)*LINE_LENGTH*2, cudaMemcpyHostToDevice);
cudaMemcpy(dev_original, original_cpu, sizeof(float)*FRAMES*XDIM*YDIM, cudaMemcpyHostToDevice);
cudaMemcpy(dev_frame, &frame, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_sum, &sum, sizeof(int), cudaMemcpyHostToDevice);

// Explicitly set device 
cudaGLSetGLDevice(2);

glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(window_width, window_height);
glutCreateWindow("Cuda GL Interop (VBO)");
glutDisplayFunc(display);

glewInit();

/*
// default initialization
glClearColor(0.0, 0.0, 0.0, 1.0);
glDisable(GL_DEPTH_TEST);

// viewport
glViewport(0, 0, window_width, window_height);

// projection
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60.0, (GLfloat)XDIM / (GLfloat) YDIM, 0.1, 10.0);

CUT_CHECK_ERROR_GL();
*/

CUcontext *pCtx = NULL;

cuInit(0);
cuCtxCreate(pCtx, CU_CTX_MAP_HOST, 0);
cuGLInit();
cuGLCtxCreate(pCtx, CU_CTX_MAP_HOST, 0);	

// Create buffer object and register it with CUDA 
createVBO(&pbo, &cuda_pbo_resource);

// Loop 
glutMainLoop(); 

deleteVBO();

cudaThreadExit();

}

access violation when I change the position of glewInit happens at glGenBuffers(1, pixel_buffer) of createVBO. As a beginner of OpenGL Im getting confused with reason of errors and have no progress debugging. Can someone help me? Thank you.

Dark_Photon · October 19, 2011, 5:43am

While you might find someone here that can help, you’re probably more likely to get the help you need on the NVidia CUDA forums:

http://forums.nvidia.com/index.php?showforum=62

While you wait, wind your code back to where you have no problems and it works well. Then add things in incrementally (binary search the problem; or linear search it – whatever you prefer).

mobeen · October 19, 2011, 8:17am

I think you should reorder your calls in this order and see if u still get the error.


int main(int argc, char *argv[]) { 
   //set GL context first.
   glutInit(&argc, argv);
   glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
   glutInitWindowSize(window_width, window_height);
   glutCreateWindow("Cuda GL Interop (VBO)");
   glutDisplayFunc(display);

   glewInit();
   
   // Pre-Calculation
   readData();
   convertor(); //convert values into single float
   calc_resample_coefficients();

  
   int frame = 0, sum =0;
   CUcontext *pCtx = NULL;

   cuInit(0);
   cuCtxCreate(pCtx, CU_CTX_MAP_HOST, 0); 

   //Cuda Memory Allocation
   cudaMalloc((void**)&dev_frame, sizeof(int));
   cudaMalloc((void**)&dev_sum, sizeof(int));
   cudaMalloc((void**)&dev_original, sizeof(float)*FRAMES*XDIM*YDIM);
   cudaMalloc((void**)&d_in,sizeof(cuComplex)*YDIM*XDIM);
   cudaMalloc((void**)&dev_resamp, sizeof(float)*LINE_LENGTH*2);
   cudaMalloc((void**)&dev_k_resampledspacing, sizeof(float));
   cudaMalloc((void**)&dc_subtracted, sizeof(float)*XDIM*YDIM);
   cudaMemcpy(dev_k_resampledspacing, &k_resampledspacing, sizeof(float), cudaMemcpyHostToDevice);
   cudaMemcpy(dev_resamp, resamp_1D, sizeof(float)*LINE_LENGTH*2, cudaMemcpyHostToDevice);
   cudaMemcpy(dev_original, original_cpu, sizeof(float)*FRAMES*XDIM*YDIM, cudaMemcpyHostToDevice);
   cudaMemcpy(dev_frame, &frame, sizeof(int),      cudaMemcpyHostToDevice);
   cudaMemcpy(dev_sum, &sum, sizeof(int), cudaMemcpyHostToDevice);

   // Explicitly set device
   cudaGLSetGLDevice(2);


cuGLInit();
cuGLCtxCreate(pCtx, CU_CTX_MAP_HOST, 0);

// Create buffer object and register it with CUDA
createVBO(&pbo, &cuda_pbo_resource);

// Loop
glutMainLoop();

deleteVBO();

cudaThreadExit();
}

See if this helps.

MasterKiten · October 20, 2011, 2:03pm

well after I changed the order and place CUT_ERROR_CHECK at

CUT_CHECK_ERROR(glGenBuffers(1, pixel_buffer));
CUT_CHECK_ERROR(glBindBuffer(GL_ARRAY_BUFFER, *pixel_buffer));

I get

1>.\Resampling_GL.cpp(126) : error C2664: ‘fprintf’ : cannot convert parameter 3 from ‘void’ to ‘…’
1> Expressions of type void cannot be converted to other types
1>.\Resampling_GL.cpp(126) : error C2664: ‘fprintf’ : cannot convert parameter 3 from ‘void’ to ‘…’
1> Expressions of type void cannot be converted to other types
1>.\Resampling_GL.cpp(127) : error C2664: ‘fprintf’ : cannot convert parameter 3 from ‘void’ to ‘…’
1> Expressions of type void cannot be converted to other types
1>.\Resampling_GL.cpp(127) : error C2664: ‘fprintf’ : cannot convert parameter 3 from ‘void’ to ‘…’
1> Expressions of type void cannot be converted to other types

where line 126 and 127 is the place were glGenBuffers(1,pixel_buffer)) and glBindBuffer(GL_ARRAY_BUFFER, *pixel_buffer) is called respectively. Im not sure how there could be parameter 3 and why type conversion problem is happening. Is this supposed to happen? Thank you.

MasterKiten · October 20, 2011, 2:15pm

Oh Im sorry it was because CUT_CHECK_ERROR cannot use void type functions. if I get rid of it, it says

Unhandled exception at 0x68699d2f (msvcr90d.dll) in Resampling_GL.exe: 0xC0000005: Access violation reading location 0x000027d9.

Im not sure how this error is getting out since Im ignoring LIBMCT.lib so there should be no conflict between default libraries… Is this something to do with my setting? Can anyone tell me how to fix this?

mobeen · October 20, 2011, 8:41pm

Unhandled exception at 0x68699d2f (msvcr90d.dll) in Resampling_GL.exe: 0xC0000005: Access violation reading location 0x000027d9.

This tells me that there is a conflict between the runtime libraries. What is the setting for project->properties->Config. properties -> c/c++ -> Code generation. I usually set this to
Multithreaded DLL (/MD) for Release and
Multithreaded Debug DLL (/MDd) for Debug
And ignore LIBCMT.lib.
What are your settings?

MasterKiten · October 25, 2011, 8:29am

Im actually using
Multithreaded Debug DLL (/MDd) for Debug now
and ignoring LIBCMT.lib so I wonder why Im getting such errors.

arafathosyas · October 17, 2019, 4:01pm

How can I run this code in ubuntu? what is the command ? My pc has cuda installed and Nvidia graphics card.
moreover, how should I save the code? Is it code.cu or code.cpp?