CUDA Interop for depth component

Hello,

CUDA interop OpenGL work for RGBA texture but does not work for depth value. As far as I know we need to map the depth component to a texture buffer then use CUDA interop for depth as a color texture. Below is how I did. Howver I can get only RGB but the depth always show 0. Can someone show me what is wrong or how can I use CUDA interop for depth bu

// Init device
cudaGLSetGLDevice(0);

...

// create a texture
deleteTexture( *tex_screen );
glGenTextures(1, tex_screen);
glBindTexture(GL_TEXTURE_2D, *tex_screen);
 // set basic parameters
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, WIDTH, HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
 glBindTexture(GL_TEXTURE_2D, 0);

// Create a texure for depth component.

glGenTextures(1, depthTexture);
// Bind the texture
glBindTexture(GL_TEXTURE_2D, *depthTexture);
// Set texture parameters
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);
// Create the texture in the GPU
glTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, WIDTH, HEIGHT, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0);
// Unbind the texture
glBindTexture(GL_TEXTURE_2D, 0);

// Create the one channel depth texture for use by cuda

glGenTextures(1, depthTexture_RGBA);
glBindTexture(GL_TEXTURE_2D, *depthTexture_RGBA);

glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);

glTexImage2D( GL_TEXTURE_2D, 0, GL_R32F, WIDTH, HEIGHT, 0, GL_RED, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D, 0);


//Create framebuffer

glGenFramebuffers(1, g_GLFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, *g_GLFramebuffer);

 // attach images
glBindTexture(GL_TEXTURE_2D, tex_screen);
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,  tex_screen, 0);

 // attach the texture to FBO depth attachment point
glBindTexture(GL_TEXTURE_2D, dep_screen);
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1,  depthTexture_RGBA, 0);

// Check to see if the frame buffer is valid
GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
{
    std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
}
// Unbind the frame buffer
glBindFramebuffer( GL_FRAMEBUFFER, 0 );

// register this texture with CUDA   

if(cudaGraphicsGLRegisterImage(&cuda_tex_screen_resource, tex_screen, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly) != cudaSuccess)
       fprintf(stderr, "Error in registering tex_screen with CUDA
");

    if(cudaGraphicsGLRegisterImage(&cuda_dep_screen_resource, depthTexture_RGBA, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly) != cudaSuccess)
        fprintf(stderr, "Error in registering dep_screen with CUDA
");
...

texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;
...
cudaGraphicsMapResources(1, &srcTex);
cudaGraphicsMapResources(1, &srcDepth);

cudaGraphicsSubResourceGetMappedArray( &srcTexArray, srcTex, 0, 0);
cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0);


cudaBindTextureToArray( texRef, srcTexArray );   
cudaBindTextureToArray( depRef, srcDepthArray);
...
uchar4 color = tex2D(texRef, x, y);
float4 depth = tex2D(depRef, x, y); 

Hit the same issue years ago when I wanted to do a depth buffer crunch in OpenCL. This thread might have some info you can use.

Thank you, Dark Photon, is the solution to map depth value to color texure is " NV_copy_depth_to_color" parameter?
Hoang1127

Looks like that might work, if your depth buffer isn’t floating point or multisample, and if you limit yourself to NVidia. That’s a really, really old extension, so I’d time it to make sure it performs well for you.

You could also do ReadPixels()/TexSubImage2D() to/from a GPU PBO. That should be pretty fast, portable, and support fixed- or floating-point depth (but won’t work if multisampled). If you do this, you might avoid doing a format conversion (initially at least). For instance, if you’re using fixed-point 24/8 packed depth/stencil, initially use GL_UNSIGNED_INT_24_8_EXT; or if 32F depth buffer, use GL_FLOAT.

Or you can do what I did and just write depth to a R32F color channel/texture while rendering. That fast and works too.

Or you can have your own simple post-process pass that does depth-to-color conversion (frag shader: read depth, write color).

Lots of options here.

Happy New Year Dark Photon, I have spent the whole week to write the code and test for depth with R32F. But it still does not show the right depth image. Here below are the code which I have edited from a CUDA interop with texture color. Where can be the error?
The main.cpp:


#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <iostream>
#include <string>
#include <math.h>
#define GLEW_STATIC // Specify GLEW_STATIC to use the static linked library (.lib) instead of the dynamic linked library (.dll) for GLEW
#include <GL/glew.h>
#include <glut.h>

// CUDA headers
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

#include "Postprocess.cu"

#define SRC_BUFFER  0
#define DST_BUFFER  1
#define SRC_DEPTH   2


int g_iGLUTWindowHandle = 0;
int g_iWindowPositionX = 0;
int g_iWindowPositionY = 0;
int g_iWindowWidth = 512;
int g_iWindowHeight = 512;

int g_iImageWidth = g_iWindowWidth;
int g_iImageHeight = g_iWindowHeight;

float g_fRotate[3] = { 0.0f, 0.0f, 0.0f };  // Rotation parameter for scene object.
bool g_bAnimate = true;                     // Animate the scene object
bool g_bPostProcess = true;                 // Enable/disable the postprocess effect.
float g_fBlurRadius = 2.0f;                 // Radius of 2D convolution blur performed in postprocess step.

GLuint g_GLFramebuffer = 0;                  // Frame buffer object for off-screen rendering.
GLuint g_GLColorAttachment0 = 0;            // Color texture to attach to frame buffer object.
GLuint g_GLDepthAttachment = 0;             // Depth buffer to attach to frame buffer object.
GLuint depthTexture	= 0;
GLuint g_GLPostprocessTexture = 0;          // This is where the result of the post-process effect will go.
                                            // This is also the final texture that will be blit to the back buffer for viewing.
 

// The CUDA Graphics Resource is used to map the OpenGL texture to a CUDA
// buffer that can be used in a CUDA kernel.
// We need 2 resource: One will be used to map to the color attachment of the
//   framebuffer and used read-only from the CUDA kernel (SRC_BUFFER), 
//   the second is used to write the postprocess effect to (DST_BUFFER).
cudaGraphicsResource_t g_CUDAGraphicsResource[3] = { 0,  0, 0};   

// Initialize OpenGL/GLUT
bool InitGL( int argc, char* argv[] );
// Initialize CUDA for OpenGL
void InitCUDA();
// Render a texture object to the current framebuffer
void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height );


// Create a framebuffer object that is used for offscreen rendering.
void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture );
void DeleteFramebuffer( GLuint& framebuffer );

void CreatePBO( GLuint& bufferID, size_t size );
void DeletePBO( GLuint& bufferID );

void CreateTexture( GLuint& texture, unsigned int width, unsigned int height );
void DeleteTexture( GLuint& texture );

void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height );
void DeleteDepthBuffer( GLuint& depthBuffer );

// Links a OpenGL texture object to a CUDA resource that can be used in the CUDA kernel.
void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags );
void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource );

void IdleGL();
void DisplayGL();
void KeyboardGL( unsigned char key, int x, int y );
void ReshapeGL( int w, int h );

void Cleanup( int errorCode, bool bExit = true )
{
    if ( g_iGLUTWindowHandle != 0 )
    {
        glutDestroyWindow( g_iGLUTWindowHandle );
        g_iGLUTWindowHandle = 0;
    }
    if ( bExit )
    {
        exit( errorCode );
    }
}

// Create a pixel buffer object
void CreatePBO( GLuint& bufferID, size_t size )
{
    // Make sure the buffer doesn't already exist
    DeletePBO( bufferID );

    glGenBuffers( 1, &bufferID );
    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, bufferID );
    glBufferData( GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW );

    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
}

void DeletePBO(  GLuint& bufferID )
{
    if ( bufferID != 0 )
    {
        glDeleteBuffers( 1, &bufferID );
        bufferID = 0;
    }
}

// Create a texture resource for rendering to.
void CreateTexture( GLuint& texture, unsigned int width, unsigned int height )
{
    // Make sure we don't already have a texture defined here
    DeleteTexture( texture );

    glGenTextures( 1, &texture );
    glBindTexture( GL_TEXTURE_2D, texture );

    // set basic parameters
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

    // Create texture data (4-component unsigned byte)
    glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );

    // Unbind the texture
    glBindTexture( GL_TEXTURE_2D, 0 );
}

void DeleteTexture( GLuint& texture )
{
    if ( texture != 0 )
    {
        glDeleteTextures(1, &texture );
        texture = 0;
    }
}

void CreateDepthTexture(GLuint& txDepth, unsigned int width, unsigned int height)
{
	DeleteTexture(txDepth);
	
	glGenTextures(1, &txDepth);
	glBindTexture(GL_TEXTURE_2D, txDepth);
	
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);	
	
	glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
	
	glBindTexture(GL_TEXTURE_2D, 0);
}


void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height )
{
    // Delete the existing depth buffer if there is one.
    DeleteDepthBuffer( depthBuffer );

    glGenRenderbuffers( 1, &depthBuffer );
    glBindRenderbuffer( GL_RENDERBUFFER, depthBuffer );

    glRenderbufferStorage( GL_RENDERBUFFER, GL_DEPTH_COMPONENT, width, height );

    // Unbind the depth buffer
    glBindRenderbuffer( GL_RENDERBUFFER, 0 );
        
}

void DeleteDepthBuffer( GLuint& depthBuffer )
{
    if ( depthBuffer != 0 )
    {
        glDeleteRenderbuffers( 1, &depthBuffer );
        depthBuffer = 0;
    }
}

void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture )
{
    // Delete the existing framebuffer if it exists.
    DeleteFramebuffer( framebuffer );

    glGenFramebuffers( 1, &framebuffer );
    glBindFramebuffer( GL_FRAMEBUFFER, framebuffer );

    glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorAttachment0, 0 );
    glFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,  GL_TEXTURE_2D, depthTexture, 0);
   glFramebufferRenderbuffer( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depthAttachment );

    // Check to see if the frame buffer is valid
    GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
    if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
    {
        std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
    }

    // Unbind the frame buffer
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );
}

void DeleteFramebuffer( GLuint& framebuffer )
{
    if ( framebuffer != 0 )
    {
        glDeleteFramebuffers( 1, &framebuffer );
        framebuffer = 0;
    }
}

void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags )
{
    // Map the GL texture resource with the CUDA resource
    cudaGraphicsGLRegisterImage( &cudaResource, GLtexture, GL_TEXTURE_2D, mapFlags );
}

void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource )
{
    if ( cudaResource != 0 )
    {
        cudaGraphicsUnregisterResource( cudaResource );
        cudaResource = 0;
    }
}

int main( int argc, char* argv[] )
{
    glutInit(&argc, argv);
  
    // Init GLUT
    if ( !InitGL( argc, argv ) )
    {
        std::cerr << "ERROR: Failed to initialize OpenGL" << std::endl;
    }

    InitCUDA();
  
    // Startup our GL render loop
    glutMainLoop();

}


bool InitGL( int argc, char* argv[] )
{
    // Material property constants.
    const GLfloat fRed[] = { 1.0f, 0.1f, 0.1f, 1.0f };
    const GLfloat fWhite[] = { 1.0f, 1.0f, 1.0f, 1.0f };

    int iScreenWidth = glutGet(GLUT_SCREEN_WIDTH);
    int iScreenHeight = glutGet(GLUT_SCREEN_HEIGHT);

//    glutInit( &argc, argv );
    glutInitDisplayMode( GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH );
    glutInitWindowPosition( iScreenWidth / 2 - g_iWindowWidth / 2,
        iScreenHeight / 2 - g_iWindowHeight / 2 );
    glutInitWindowSize( g_iWindowWidth, g_iWindowHeight );

    g_iGLUTWindowHandle = glutCreateWindow( "Postprocess GL" );

    // Register GLUT callbacks
    glutDisplayFunc(DisplayGL);
    glutKeyboardFunc(KeyboardGL);
    glutReshapeFunc(ReshapeGL);
    glutIdleFunc(IdleGL);

    // Init GLEW
    glewInit();
    GLboolean gGLEW = glewIsSupported(
        "GL_VERSION_3_1 " 
        "GL_ARB_pixel_buffer_object "
        "GL_ARB_framebuffer_object "
        "GL_ARB_copy_buffer " 
        );

    int maxAttachemnts = 0;
    glGetIntegerv( GL_MAX_COLOR_ATTACHMENTS, &maxAttachemnts );

    if ( !gGLEW ) return false;

    glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );
    glDisable( GL_DEPTH_TEST );

    // Setup the viewport
    glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

    // Setup the projection matrix
    glMatrixMode( GL_PROJECTION );
    glLoadIdentity();

    gluPerspective( 60.0, (GLdouble)g_iWindowWidth/(GLdouble)g_iWindowHeight, 0.1, 1.0 );
    glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );

    // Enable one light.
    glEnable( GL_LIGHT0 );
    glMaterialfv( GL_FRONT_AND_BACK, GL_DIFFUSE, fRed );
    glMaterialfv( GL_FRONT_AND_BACK, GL_SPECULAR, fWhite );
    glMaterialf( GL_FRONT_AND_BACK, GL_SHININESS, 60.0f );

    return true;
}

void InitCUDA()
{
    // We have to call cudaGLSetGLDevice if we want to use OpenGL interoperability.
    cudaGLSetGLDevice(0);
}


void IdleGL()
{
    if (g_bAnimate) 
    {
        g_fRotate[0] += 0.2; while(g_fRotate[0] > 360.0f) g_fRotate[0] -= 360.0f;   // Increment and clamp
        g_fRotate[1] += 0.6; while(g_fRotate[1] > 360.0f) g_fRotate[1] -= 360.0f;
        g_fRotate[2] += 1.0; while(g_fRotate[2] > 360.0f) g_fRotate[2] -= 360.0f;
    }

    glutPostRedisplay();

}

// Render the initial scene
void RenderScene()
{
    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

    glMatrixMode( GL_PROJECTION );
    glLoadIdentity();
    gluPerspective( 60.0, (GLdouble)g_iWindowWidth / (GLdouble)g_iWindowHeight, 0.1, 10.0 );

    glMatrixMode( GL_MODELVIEW );
    glLoadIdentity();
    glTranslatef( 0.0f, 0.0f, -3.0f );

    glRotatef( g_fRotate[0], 1.0f, 0.0f, 0.0f );
    glRotatef( g_fRotate[1], 0.0f, 1.0f, 0.0f );
    glRotatef( g_fRotate[2], 0.0f, 0.0f, 1.0f );

    glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

    glEnable( GL_LIGHTING );
    glEnable( GL_DEPTH_TEST );
    glDepthFunc( GL_LESS );

    glutSolidTeapot( 1.0 );

}

// Perform a post-process effect on the current framebuffer (back buffer)
void Postprocess()
{
    if ( g_bPostProcess )
    {
    	
        PostprocessCUDA( g_CUDAGraphicsResource[DST_BUFFER], g_CUDAGraphicsResource[SRC_BUFFER], g_CUDAGraphicsResource[SRC_DEPTH] , g_iImageWidth, g_iImageHeight );
    }
    else
    {
        // No postprocess effect. Just copy the contents of the color buffer
        // from the framebuffer (where the scene was rendered) to the 
        // post-process texture.  The postprocess texture will be rendered to the screen
        // in the next step.
        glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
        glBindTexture( GL_TEXTURE_2D, g_GLPostprocessTexture );

        glCopyTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, 0, 0, g_iImageWidth, g_iImageHeight, 0 );

        glBindTexture( GL_TEXTURE_2D, 0 );
        glBindFramebuffer( GL_FRAMEBUFFER, 0 );
    }
}

void DisplayGL()
{
    // Bind the framebuffer that we want to use as the render target.
    glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
    RenderScene();
    // Unbind the framebuffer so we render to the back buffer again.
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );

    Postprocess();

    // Blit the image full-screen
    DisplayImage( g_GLPostprocessTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );

    glutSwapBuffers();
    glutPostRedisplay();
}

void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height )
{
    glBindTexture(GL_TEXTURE_2D, texture);
    glEnable(GL_TEXTURE_2D);
    glDisable(GL_DEPTH_TEST);
    glDisable(GL_LIGHTING);
    glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

    glMatrixMode(GL_PROJECTION);
    glPushMatrix();
    glLoadIdentity();
    glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);

    glMatrixMode( GL_MODELVIEW);
    glLoadIdentity();

    glPushAttrib( GL_VIEWPORT_BIT );
    glViewport(x, y, width, height );

    glBegin(GL_QUADS);
    glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, -1.0, 0.5);
    glTexCoord2f(1.0, 0.0); glVertex3f(1.0, -1.0, 0.5);
    glTexCoord2f(1.0, 1.0); glVertex3f(1.0, 1.0, 0.5);
    glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, 1.0, 0.5);
    glEnd();

    glPopAttrib();

    glMatrixMode(GL_PROJECTION);
    glPopMatrix();

    glDisable(GL_TEXTURE_2D);
}

void KeyboardGL( unsigned char key, int x, int y )
{
    switch( key )
    {
    case '\033': // escape quits
    case 'Q':    // Q quits
    case 'q':    // q quits
        {
            // Cleanup up and quit
            Cleanup(0);
        }
        break;
    }

    glutPostRedisplay();
}

void ReshapeGL( int w, int h )
{
    h = std::max(h, 1);

    g_iWindowWidth = w;
    g_iWindowHeight = h;

    g_iImageWidth = w;
    g_iImageHeight = h;

    // Create a surface texture to render the scene to.
    CreateTexture( g_GLColorAttachment0, g_iImageWidth, g_iImageHeight );
   
    // Create a depth buffer for the frame buffer object.
    CreateDepthBuffer( g_GLDepthAttachment, g_iImageWidth, g_iImageHeight );
 
    CreateDepthTexture( depthTexture, g_iImageWidth, g_iImageHeight);
   
    // Attach the color and depth textures to the framebuffer.
    CreateFramebuffer( g_GLFramebuffer, g_GLColorAttachment0, g_GLDepthAttachment , depthTexture);

    // Create a texture to render the post-process effect to.
    CreateTexture( g_GLPostprocessTexture, g_iImageWidth, g_iImageHeight );

    // Map the color attachment to a CUDA graphics resource so we can read it in a CUDA a kernel.
    CreateCUDAResource( g_CUDAGraphicsResource[SRC_BUFFER], g_GLColorAttachment0, cudaGraphicsMapFlagsReadOnly );
    CreateCUDAResource( g_CUDAGraphicsResource[SRC_DEPTH], depthTexture, cudaGraphicsMapFlagsReadOnly );
    // Map the post-process texture to the CUDA resource so it can be 
    // written in the kernel.
    CreateCUDAResource( g_CUDAGraphicsResource[DST_BUFFER], g_GLPostprocessTexture, cudaGraphicsMapFlagsWriteDiscard );

    glutPostRedisplay();
}

And here is postProcess.cu:


#include <cuda_runtime_api.h>
#include "Postprocess.h"

#define BLOCK_SIZE 16     // block size

texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;

__global__ void PostprocessKernel( uchar4* dst, unsigned int imgWidth, unsigned int imgHeight )
{
    unsigned int tx = threadIdx.x;
    unsigned int ty = threadIdx.y;
    unsigned int bw = blockDim.x;
    unsigned int bh = blockDim.y;
    // Non-normalized U, V coordinates of input texture for current thread.
    unsigned int u = ( bw * blockIdx.x ) + tx;
    unsigned int v = ( bh * blockIdx.y ) + ty;

    // Early-out if we are beyond the texture coordinates for our texture.
    if ( u > imgWidth || v > imgHeight ) return;

    unsigned int index = ( v * imgWidth ) + u;
 //   uchar4 color = tex2D( texRef, u, v );
    float depth = tex2D(depRef, u, v);
 //   dst[index] = make_uchar4( color.x, color.y, color.z, 1);
    
    dst[index] = make_uchar4( depth*255, depth*255, depth*255, 1);
   }

uchar4* g_dstBuffer = NULL;
size_t g_BufferSize = 0; 

void PostprocessCUDA( cudaGraphicsResource_t& dst, cudaGraphicsResource_t& src, cudaGraphicsResource_t& srcDepth,  unsigned int width, unsigned int height)
{

    cudaGraphicsResource_t resources[2] = { srcDepth, dst };

    // Map the resources so they can be used in the kernel.
   cudaGraphicsMapResources( 2, resources ) ;

	
 //   cudaArray* srcArray;
    cudaArray* dstArray;   
    cudaArray* srcDepthArray;   

    // Get a device pointer to the OpenGL buffers
 //  cudaGraphicsSubResourceGetMappedArray( &srcArray, src, 0, 0 ) ;
    cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0 ) ;
    cudaGraphicsSubResourceGetMappedArray( &dstArray, dst, 0, 0 ) ;
 
    // Map the source texture to a texture reference.
  //   cudaBindTextureToArray( texRef, srcArray );
     cudaBindTextureToArray( depRef, srcDepthArray );
     
     // Destination buffer to store the result of the postprocess effect.
    size_t bufferSize = width * height * sizeof(uchar4);
    if ( g_BufferSize != bufferSize )
    {
        if ( g_dstBuffer != NULL )
        {
            cudaFree( g_dstBuffer );
        }
        // Only re-allocate the global memory buffer if the screen size changes, 
        // or it has never been allocated before (g_BufferSize is still 0)
        g_BufferSize = bufferSize;
        cudaMalloc( &g_dstBuffer, g_BufferSize );
    }

    // Compute the grid size
    size_t blocksW = (size_t)ceilf( width / (float)BLOCK_SIZE );
    size_t blocksH = (size_t)ceilf( height / (float)BLOCK_SIZE );
    dim3 gridDim( blocksW, blocksH, 1 );
    dim3 blockDim( BLOCK_SIZE, BLOCK_SIZE, 1 );

    PostprocessKernel<<< gridDim, blockDim >>>( g_dstBuffer, width, height );

    // Copy the destination back to the source array
    cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice  );

    // Unbind the texture reference
 //   cudaUnbindTexture( texRef);
    cudaUnbindTexture( depRef);
    
    // Unmap the resources again so the texture can be rendered in OpenGL
     cudaGraphicsUnmapResources( 2, resources ) ;
}

And here is CMakeLists.txt


cmake_minimum_required (VERSION 2.8)
project (PostprocessLinux)

find_package(CUDA)

include_directories(${PROJECT_SOURCE_DIR}/include)
link_directories(${PROJECT_SOURCE_DIR}/lib)

cuda_include_directories("/usr/local/cuda-6.5/include")
link_directories("/usr/local/cuda-6.5/lib64")

cuda_compile(Postprocess_O, Postprocess.cu)
cuda_compile(Main_O main.cu)

cuda_add_executable (test ${Postprocess_O} ${Main_O} )

target_link_libraries(test "-lglut" "-lGL" "-lGLU" "-lGLEW" "-lpthread" "-lcudart")

INSTALL(TARGETS test RUNTIME DESTINATION bin LIBRARY DESTINATION lib)


To build it, you need make a build directory and cmake … and then make

You might check for GL errors. This doesn’t look right.

First, R32F is a color format. You’re creating a R32F color texture, but then requesting to populate it with a DEPTH_COMPONENT format. I don’t think that’s valid. Since you provided NULL for the pointer it’s probably academic though.

Then also you’ve got two bindings for the DEPTH_ATTACHMENT for the FBO. You need one.

Finally, AFAIK you can’t use an R32F color texture as a depth attachment. You need a DEPTH_COMPONENT renderbuffer or texture here.

You either need to rasterize out a 2nd color buffer with the depth value (this is your R32F texture), or after rendering to your depthAttachment, you need to generate the R32F texture/renderbuffer from that via one of the methods mentioned.

Thanks you for the suggestion. They are corrected all now. But still shows black screen when draw depth texture. When drawing with color texture (dst[index] = make_uchar4( color.x, color.y, color.z, 1):wink: then It shows correctly. Now the R32F is the second color texture. And It is mapped to read in kernel. Where can be wrong. Here is the code.

main.cpp


#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <iostream>
#include <string>
#include <math.h>
#define GLEW_STATIC // Specify GLEW_STATIC to use the static linked library (.lib) instead of the dynamic linked library (.dll) for GLEW
#include <GL/glew.h>
#include <glut.h>

// CUDA headers
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

#include "Postprocess.cu"

#define SRC_BUFFER  0
#define DST_BUFFER  1
#define SRC_DEPTH   2


int g_iGLUTWindowHandle = 0;
int g_iWindowPositionX = 0;
int g_iWindowPositionY = 0;
int g_iWindowWidth = 512;
int g_iWindowHeight = 512;

int g_iImageWidth = g_iWindowWidth;
int g_iImageHeight = g_iWindowHeight;

float g_fRotate[3] = { 0.0f, 0.0f, 0.0f };  // Rotation parameter for scene object.

float g_fBlurRadius = 2.0f;                 // Radius of 2D convolution blur performed in postprocess step.

GLuint g_GLFramebuffer = 0;                  // Frame buffer object for off-screen rendering.
GLuint g_GLColorAttachment0 = 0;            // Color texture to attach to frame buffer object.
GLuint g_GLDepthAttachment = 0;             // Depth buffer to attach to frame buffer object.
GLuint depthTexture	= 0;
GLuint g_GLPostprocessTexture = 0;          // This is where the result of the post-process effect will go.
                                            // This is also the final texture that will be blit to the back buffer for viewing.


// The CUDA Graphics Resource is used to map the OpenGL texture to a CUDA
// buffer that can be used in a CUDA kernel.
// We need 2 resource: One will be used to map to the color attachment of the
//   framebuffer and used read-only from the CUDA kernel (SRC_BUFFER), 
//   the second is used to write the postprocess effect to (DST_BUFFER).
cudaGraphicsResource_t g_CUDAGraphicsResource[3] = { 0,  0, 0};   

// Initialize OpenGL/GLUT
bool InitGL( int argc, char* argv[] );
// Initialize CUDA for OpenGL
void InitCUDA();
// Render a texture object to the current framebuffer
void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height );


// Create a framebuffer object that is used for offscreen rendering.
void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture );
void DeleteFramebuffer( GLuint& framebuffer );

void CreatePBO( GLuint& bufferID, size_t size );
void DeletePBO( GLuint& bufferID );

void CreateTexture( GLuint& texture, unsigned int width, unsigned int height );
void DeleteTexture( GLuint& texture );

void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height );
void DeleteDepthBuffer( GLuint& depthBuffer );

// Links a OpenGL texture object to a CUDA resource that can be used in the CUDA kernel.
void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags );
void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource );

void IdleGL();
void DisplayGL();
void KeyboardGL( unsigned char key, int x, int y );
void ReshapeGL( int w, int h );

void Cleanup( int errorCode, bool bExit = true )
{
    if ( g_iGLUTWindowHandle != 0 )
    {
        glutDestroyWindow( g_iGLUTWindowHandle );
        g_iGLUTWindowHandle = 0;
    }
    if ( bExit )
    {
        exit( errorCode );
    }
}

// Create a pixel buffer object
void CreatePBO( GLuint& bufferID, size_t size )
{
    // Make sure the buffer doesn't already exist
    DeletePBO( bufferID );

    glGenBuffers( 1, &bufferID );
    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, bufferID );
    glBufferData( GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW );

    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
}

void DeletePBO(  GLuint& bufferID )
{
    if ( bufferID != 0 )
    {
        glDeleteBuffers( 1, &bufferID );
        bufferID = 0;
    }
}

// Create a texture resource for rendering to.
void CreateTexture( GLuint& texture, unsigned int width, unsigned int height )
{
    // Make sure we don't already have a texture defined here
    DeleteTexture( texture );

    glGenTextures( 1, &texture );
    glBindTexture( GL_TEXTURE_2D, texture );

    // set basic parameters
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

    // Create texture data (4-component unsigned byte)
    glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );

    // Unbind the texture
    glBindTexture( GL_TEXTURE_2D, 0 );
}

void DeleteTexture( GLuint& texture )
{
    if ( texture != 0 )
    {
        glDeleteTextures(1, &texture );
        texture = 0;
    }
}

void CreateDepthTexture(GLuint& txDepth, unsigned int width, unsigned int height)
{
	DeleteTexture(txDepth);
	
	glGenTextures(1, &txDepth);
	glBindTexture(GL_TEXTURE_2D, txDepth);
	
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);	
	
	glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
	
	glBindTexture(GL_TEXTURE_2D, 0);
}


void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height )
{
    // Delete the existing depth buffer if there is one.
    DeleteDepthBuffer( depthBuffer );

    glGenRenderbuffers( 1, &depthBuffer );
    glBindRenderbuffer( GL_RENDERBUFFER, depthBuffer );

    glRenderbufferStorage( GL_RENDERBUFFER, GL_DEPTH_COMPONENT, width, height );

    // Unbind the depth buffer
    glBindRenderbuffer( GL_RENDERBUFFER, 0 );
        
}

void DeleteDepthBuffer( GLuint& depthBuffer )
{
    if ( depthBuffer != 0 )
    {
        glDeleteRenderbuffers( 1, &depthBuffer );
        depthBuffer = 0;
    }
}

void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture )
{
    // Delete the existing framebuffer if it exists.
    DeleteFramebuffer( framebuffer );

    glGenFramebuffers( 1, &framebuffer );
    glBindFramebuffer( GL_FRAMEBUFFER, framebuffer );

     glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorAttachment0, 0 );
    glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1,  GL_TEXTURE_2D, depthTexture, 0);   
    glFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,  GL_TEXTURE_2D, depthAttachment, 0);

    // Check to see if the frame buffer is valid
    GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
    if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
    {
        std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
    }

    // Unbind the frame buffer
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );
}

void DeleteFramebuffer( GLuint& framebuffer )
{
    if ( framebuffer != 0 )
    {
        glDeleteFramebuffers( 1, &framebuffer );
        framebuffer = 0;
    }
}

void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags )
{
    // Map the GL texture resource with the CUDA resource
    cudaGraphicsGLRegisterImage( &cudaResource, GLtexture, GL_TEXTURE_2D, mapFlags );
}

void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource )
{
    if ( cudaResource != 0 )
    {
        cudaGraphicsUnregisterResource( cudaResource );
        cudaResource = 0;
    }
}

int main( int argc, char* argv[] )
{
    glutInit(&argc, argv);
  
    // Init GLUT
    if ( !InitGL( argc, argv ) )
    {
        std::cerr << "ERROR: Failed to initialize OpenGL" << std::endl;
    }

    InitCUDA();
  
    // Startup our GL render loop
    glutMainLoop();

}


bool InitGL( int argc, char* argv[] )
{
    // Material property constants.
    const GLfloat fRed[] = { 1.0f, 0.1f, 0.1f, 1.0f };
    const GLfloat fWhite[] = { 1.0f, 1.0f, 1.0f, 1.0f };

    int iScreenWidth = glutGet(GLUT_SCREEN_WIDTH);
    int iScreenHeight = glutGet(GLUT_SCREEN_HEIGHT);

//    glutInit( &argc, argv );
    glutInitDisplayMode( GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH );
    glutInitWindowPosition( iScreenWidth / 2 - g_iWindowWidth / 2,
        iScreenHeight / 2 - g_iWindowHeight / 2 );
    glutInitWindowSize( g_iWindowWidth, g_iWindowHeight );

    g_iGLUTWindowHandle = glutCreateWindow( "Postprocess GL" );

    // Register GLUT callbacks
    glutDisplayFunc(DisplayGL);
    glutKeyboardFunc(KeyboardGL);
    glutReshapeFunc(ReshapeGL);
    glutIdleFunc(IdleGL);

    // Init GLEW
    glewInit();
    GLboolean gGLEW = glewIsSupported(
        "GL_VERSION_3_1 " 
        "GL_ARB_pixel_buffer_object "
        "GL_ARB_framebuffer_object "
        "GL_ARB_copy_buffer " 
        );

    int maxAttachemnts = 0;
    glGetIntegerv( GL_MAX_COLOR_ATTACHMENTS, &maxAttachemnts );

    if ( !gGLEW ) return false;

    glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );
 //   glDisable( GL_DEPTH_TEST );

    // Setup the viewport
    glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

    // Setup the projection matrix
    glMatrixMode( GL_PROJECTION );
    glLoadIdentity();

    gluPerspective( 60.0, (GLdouble)g_iWindowWidth/(GLdouble)g_iWindowHeight, 0.1, 1.0 );
    glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );

    // Enable one light.
    glEnable( GL_LIGHT0 );
    glMaterialfv( GL_FRONT_AND_BACK, GL_DIFFUSE, fRed );
    glMaterialfv( GL_FRONT_AND_BACK, GL_SPECULAR, fWhite );
    glMaterialf( GL_FRONT_AND_BACK, GL_SHININESS, 60.0f );

    return true;
}

void InitCUDA()
{
    // We have to call cudaGLSetGLDevice if we want to use OpenGL interoperability.
    cudaGLSetGLDevice(0);
}


void IdleGL()
{

    glutPostRedisplay();

}

// Render the initial scene
void RenderScene()
{
    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

    glMatrixMode( GL_PROJECTION );
    glLoadIdentity();
    gluPerspective( 60.0, (GLdouble)g_iWindowWidth / (GLdouble)g_iWindowHeight, 0.1, 10.0 );

    glMatrixMode( GL_MODELVIEW );
    glLoadIdentity();
    glTranslatef( 0.0f, 0.0f, -3.0f );

    glRotatef( g_fRotate[0], 1.0f, 0.0f, 0.0f );
    glRotatef( g_fRotate[1], 0.0f, 1.0f, 0.0f );
    glRotatef( g_fRotate[2], 0.0f, 0.0f, 1.0f );

    glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

    glEnable( GL_LIGHTING );
    glEnable( GL_DEPTH_TEST );
    glDepthFunc( GL_LESS );

    glutSolidTeapot( 1.0 );

}

// Perform a post-process effect on the current framebuffer (back buffer)
void Postprocess()
{
      	
        PostprocessCUDA( g_CUDAGraphicsResource[DST_BUFFER], g_CUDAGraphicsResource[SRC_BUFFER], g_CUDAGraphicsResource[SRC_DEPTH] , g_iImageWidth, g_iImageHeight );
   
}

void DisplayGL()
{
    // Bind the framebuffer that we want to use as the render target.
    glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
    RenderScene();
    // Unbind the framebuffer so we render to the back buffer again.
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );

 //   unsigned char * readData = (unsigned char* )glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);

	// Test reading
 //   glBindTexture(GL_TEXTURE_2D, depthTexture );

//    glActiveTexture(GL_TEXTURE0);
  //  glGetTexImage(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, GL_FLOAT, readData);


 //    glBindTexture(GL_TEXTURE_2D, 0);

    Postprocess();

    // Blit the image full-screen
    DisplayImage( g_GLPostprocessTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );


    glutSwapBuffers();
    glutPostRedisplay();

}

void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height )
{
    glBindTexture(GL_TEXTURE_2D, texture);
    glEnable(GL_TEXTURE_2D);
    glDisable(GL_DEPTH_TEST);
    glDisable(GL_LIGHTING);
    glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

    glMatrixMode(GL_PROJECTION);
    glPushMatrix();
    glLoadIdentity();
    glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);

    glMatrixMode( GL_MODELVIEW);
    glLoadIdentity();

    glPushAttrib( GL_VIEWPORT_BIT );
    glViewport(x, y, width, height );

    glBegin(GL_QUADS);
    glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, -1.0, 0.5);
    glTexCoord2f(1.0, 0.0); glVertex3f(1.0, -1.0, 0.5);
    glTexCoord2f(1.0, 1.0); glVertex3f(1.0, 1.0, 0.5);
    glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, 1.0, 0.5);
    glEnd();

    glPopAttrib();

    glMatrixMode(GL_PROJECTION);
    glPopMatrix();

    glDisable(GL_TEXTURE_2D);
}

void KeyboardGL( unsigned char key, int x, int y )
{
    switch( key )
    {
    case '\033': // escape quits
    case 'Q':    // Q quits
    case 'q':    // q quits
        {
            // Cleanup up and quit
            Cleanup(0);
        }
        break;
    }

    glutPostRedisplay();
}

void ReshapeGL( int w, int h )
{
    h = std::max(h, 1);

    g_iWindowWidth = w;
    g_iWindowHeight = h;

    g_iImageWidth = w;
    g_iImageHeight = h;

    // Create a surface texture to render the scene to.
    CreateTexture( g_GLColorAttachment0, g_iImageWidth, g_iImageHeight );
   CreateDepthTexture( depthTexture, g_iImageWidth, g_iImageHeight);
  
    // Create a depth buffer for the frame buffer object.
//    CreateDepthBuffer( g_GLDepthAttachment, g_iImageWidth, g_iImageHeight );
 
  
    // Attach the color and depth textures to the framebuffer.
    CreateFramebuffer( g_GLFramebuffer, g_GLColorAttachment0, g_GLDepthAttachment , depthTexture);

    // Create a texture to render the post-process effect to.
    CreateTexture( g_GLPostprocessTexture, g_iImageWidth, g_iImageHeight );

    // Map the color attachment to a CUDA graphics resource so we can read it in a CUDA a kernel.
    CreateCUDAResource( g_CUDAGraphicsResource[SRC_BUFFER], g_GLColorAttachment0, cudaGraphicsMapFlagsReadOnly );
    CreateCUDAResource( g_CUDAGraphicsResource[SRC_DEPTH], depthTexture, cudaGraphicsMapFlagsReadOnly );
    // Map the post-process texture to the CUDA resource so it can be 
    // written in the kernel.
    CreateCUDAResource( g_CUDAGraphicsResource[DST_BUFFER], g_GLPostprocessTexture, cudaGraphicsMapFlagsWriteDiscard );

    glutPostRedisplay();
} 

Here is the Postprocess.cu:


#include <cuda_runtime_api.h>
#include "Postprocess.h"

#define BLOCK_SIZE 16     // block size

texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;

__global__ void PostprocessKernel( uchar4* dst, unsigned int imgWidth, unsigned int imgHeight )
{
    unsigned int tx = threadIdx.x;
    unsigned int ty = threadIdx.y;
    unsigned int bw = blockDim.x;
    unsigned int bh = blockDim.y;
    // Non-normalized U, V coordinates of input texture for current thread.
    unsigned int u = ( bw * blockIdx.x ) + tx;
    unsigned int v = ( bh * blockIdx.y ) + ty;

    // Early-out if we are beyond the texture coordinates for our texture.
    if ( u > imgWidth || v > imgHeight ) return;

    unsigned int index = ( v * imgWidth ) + u;
    uchar4 color = tex2D( texRef, u, v );
    float depth = tex2D(depRef, u, v);

 //   dst[index] = make_uchar4( color.x, color.y, color.z, 1);
    
   dst[index] = make_uchar4( depth*255, depth*255, depth*255, 1);
   }

uchar4* g_dstBuffer = NULL;
size_t g_BufferSize = 0; 

void PostprocessCUDA( cudaGraphicsResource_t& dst, cudaGraphicsResource_t& src, cudaGraphicsResource_t& srcDepth,  unsigned int width, unsigned int height)
{

  //  cudaGraphicsResource_t resources[3] = { src, srcDepth, dst };

    // Map the resources so they can be used in the kernel.
   cudaGraphicsMapResources( 1, &src ) ;
   cudaGraphicsMapResources( 1, &srcDepth ) ;
   cudaGraphicsMapResources(1, &dst ) ;

	
   cudaArray* srcArray;
   cudaArray* dstArray;   
   cudaArray* srcDepthArray;   

    // Get a device pointer to the OpenGL buffers
   cudaGraphicsSubResourceGetMappedArray( &srcArray, src, 0, 0 ) ;
   cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0 ) ;
   cudaGraphicsSubResourceGetMappedArray( &dstArray, dst, 0, 0 ) ;
 
    // Map the source texture to a texture reference.
     cudaBindTextureToArray( texRef, srcArray );
     cudaBindTextureToArray( depRef, srcDepthArray );
     
     // Destination buffer to store the result of the postprocess effect.
    size_t bufferSize = width * height * sizeof(uchar4);
    if ( g_BufferSize != bufferSize )
    {
        if ( g_dstBuffer != NULL )
        {
            cudaFree( g_dstBuffer );
        }
        // Only re-allocate the global memory buffer if the screen size changes, 
        // or it has never been allocated before (g_BufferSize is still 0)
        g_BufferSize = bufferSize;
        cudaMalloc( &g_dstBuffer, g_BufferSize );
    }

    // Compute the grid size
    size_t blocksW = (size_t)ceilf( width / (float)BLOCK_SIZE );
    size_t blocksH = (size_t)ceilf( height / (float)BLOCK_SIZE );
    dim3 gridDim( blocksW, blocksH, 1 );
    dim3 blockDim( BLOCK_SIZE, BLOCK_SIZE, 1 );

    PostprocessKernel<<< gridDim, blockDim >>>( g_dstBuffer, width, height );

    // Copy the destination back to the source array
    cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice  );

    // Unbind the texture reference
    cudaUnbindTexture( texRef);
    cudaUnbindTexture( depRef);
    
    // Unmap the resources again so the texture can be rendered in OpenGL
     cudaGraphicsUnmapResources( 1, &src ) ;
    cudaGraphicsUnmapResources( 1, &srcDepth ) ;
    cudaGraphicsUnmapResources( 1, &dst ) ;

}

And the CMakelists.txt:

cmake_minimum_required (VERSION 2.8)
project (PostprocessLinux)

find_package(CUDA)

include_directories(${PROJECT_SOURCE_DIR}/include)
link_directories(${PROJECT_SOURCE_DIR}/lib)

cuda_include_directories("/usr/local/cuda-6.5/include")
link_directories("/usr/local/cuda-6.5/lib64")

cuda_compile(Postprocess_O, Postprocess.cu)
cuda_compile(Main_O main.cu)

cuda_add_executable (test ${Postprocess_O} ${Main_O} )

target_link_libraries(test "-lglut" "-lGL" "-lGLU" "-lGLEW" "-lpthread" "-lcudart")

INSTALL(TARGETS test RUNTIME DESTINATION bin LIBRARY DESTINATION lib)

Suggestion: get your R32F color “depth” texture correct and leave CUDA on the sidelines for now. Render the R32F color texture containing depth to the screen with OpenGL in a separate pass. Linearize depth values so that you can see something useful (otherwise most of the values will be clustered up around near 1.0. Assuming a perspective projection, use something like this:


float ComputeEyeSpaceDepth(in float depth) // Assumes perspective projection used to render "depth"
{
  return near * far / ((depth * (far - near)) - far);
}

float depth_win = texelFetch( r32f_tex, ivec2( gl_FragCoord.xy ), 0 ).r;
float depth_eye = ComputeEyeSpaceDepth( depth_win );
float color     = (-depth_eye-near) / (far - near);
gl_FragData[0] = vec4( vec3( color ), 1 );

With shader, It shows the depth value in render. But in CUDA kernel it still show emply (black).

[ATTACH=CONFIG]860[/ATTACH]

Can you suggest me anything.

Sounds like you’ve resolved your OpenGL issues then. You’ll probably get the best response to your CUDA questions on the CUDA forums at https://devtalk.nvidia.com/.

Thanks Dark Photon, I did post as your suggestion. But still not get any reply yet.
https://devtalk.nvidia.com/default/topic/797440/cuda-programming-and-performance/cuda-interop-for-depth-component-work-around-/

There is another question in framebuffer. How can I map the depth texture buffer to a R32F texture buffer.

Here is my depth texture

glGenTextures(1, &depthTexture);
	// Bind the texture
	glBindTexture(GL_TEXTURE_2D, depthTexture);
	// Set texture parameters
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);
	// Create the texture in the GPU
	glTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, WIDTH, HEIGHT, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
        glBindTexture(GL_TEXTURE_2D, 0);
	

The R32F texture buffer

glGenTextures(1, &depthTexture_R);
	glBindTexture(GL_TEXTURE_2D, depthTexture_R);

	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
	glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);

	glTexImage2D( GL_TEXTURE_2D, 0, GL_R32F, WIDTH, HEIGHT, 0, GL_RED, GL_FLOAT, 0);
	
	glBindTexture(GL_TEXTURE_2D, 0);

In framebuffer:

glGenFramebuffers(1, &gFramebuffer);
	glBindFramebuffer(GL_FRAMEBUFFER, gFramebuffer);
    // attach images
	glBindTexture(GL_TEXTURE_2D, tex_screen);
	glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,  tex_screen, 0);
     // attach the texture to FBO depth attachment point
	glBindTexture(GL_TEXTURE_2D, depthTexture);
	glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1,  depthTexture_R, 0);
 // Unbind the frame buffer
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );

It does not show the depth value correct.

How to copy depth texture to R32F texture. Following is the code. It show the depth incorrect.

Depth Texure:

// Create a texure
	glGenTextures(1, &depthTexture);
	// Bind the texture
	glBindTexture(GL_TEXTURE_2D, depthTexture);
	// Set texture parameters
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);
	// Create the texture in the GPU
	glTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, WIDTH, HEIGHT, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
        glBindTexture(GL_TEXTURE_2D, 0);

R32F texure:

	glGenTextures(1, &depthTexture_R);
	glBindTexture(GL_TEXTURE_2D, depthTexture_R);

	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
	glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);

	glTexImage2D( GL_TEXTURE_2D, 0, GL_R32F, WIDTH, HEIGHT, 0, GL_RED, GL_FLOAT, 0);
	
	glBindTexture(GL_TEXTURE_2D, 0);
	

The frame buffer mapping:


glGenFramebuffers(1, &gFramebuffer);
	glBindFramebuffer(GL_FRAMEBUFFER, gFramebuffer);
    // attach images
	glBindTexture(GL_TEXTURE_2D, tex_screen);
	glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,  tex_screen, 0);
     // attach the texture to FBO depth attachment point
	glBindTexture(GL_TEXTURE_2D, depthTexture);
	glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1,  depthTexture_R, 0);
 // Unbind the frame buffer
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );

Thanks in advance.