Part of the Khronos Group
OpenGL.org

The Industry's Foundation for High Performance Graphics

from games to virtual reality, mobile phones to supercomputers

Page 1 of 2 12 LastLast
Results 1 to 10 of 11

Thread: CUDA Interop for depth component

  1. #1
    Junior Member Newbie
    Join Date
    Dec 2014
    Posts
    6

    CUDA Interop for depth component

    Hello,

    CUDA interop OpenGL work for RGBA texture but does not work for depth value. As far as I know we need to map the depth component to a texture buffer then use CUDA interop for depth as a color texture. Below is how I did. Howver I can get only RGB but the depth always show 0. Can someone show me what is wrong or how can I use CUDA interop for depth bu

    Code :
    // Init device
    cudaGLSetGLDevice(0);
     
    ...
     
    // create a texture
    deleteTexture( *tex_screen );
    glGenTextures(1, tex_screen);
    glBindTexture(GL_TEXTURE_2D, *tex_screen);
     // set basic parameters
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, WIDTH, HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
     glBindTexture(GL_TEXTURE_2D, 0);
     
    // Create a texure for depth component.
     
    glGenTextures(1, depthTexture);
    // Bind the texture
    glBindTexture(GL_TEXTURE_2D, *depthTexture);
    // Set texture parameters
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);
    // Create the texture in the GPU
    glTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, WIDTH, HEIGHT, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0);
    // Unbind the texture
    glBindTexture(GL_TEXTURE_2D, 0);
     
    // Create the one channel depth texture for use by cuda
     
    glGenTextures(1, depthTexture_RGBA);
    glBindTexture(GL_TEXTURE_2D, *depthTexture_RGBA);
     
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_TEXTURE_MODE, GL_INTENSITY);
     
    glTexImage2D( GL_TEXTURE_2D, 0, GL_R32F, WIDTH, HEIGHT, 0, GL_RED, GL_FLOAT, 0);
    glBindTexture(GL_TEXTURE_2D, 0);
     
     
    //Create framebuffer
     
    glGenFramebuffers(1, g_GLFramebuffer);
    glBindFramebuffer(GL_FRAMEBUFFER, *g_GLFramebuffer);
     
     // attach images
    glBindTexture(GL_TEXTURE_2D, tex_screen);
    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,  tex_screen, 0);
     
     // attach the texture to FBO depth attachment point
    glBindTexture(GL_TEXTURE_2D, dep_screen);
    glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1,  depthTexture_RGBA, 0);
     
    // Check to see if the frame buffer is valid
    GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
    if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
    {
        std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
    }
    // Unbind the frame buffer
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );
     
    // register this texture with CUDA   
     
    if(cudaGraphicsGLRegisterImage(&cuda_tex_screen_resource, tex_screen, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly) != cudaSuccess)
           fprintf(stderr, "Error in registering tex_screen with CUDA\n");
     
        if(cudaGraphicsGLRegisterImage(&cuda_dep_screen_resource, depthTexture_RGBA, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly) != cudaSuccess)
            fprintf(stderr, "Error in registering dep_screen with CUDA\n");
    ...
     
    texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
    texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;
    ...
    cudaGraphicsMapResources(1, &srcTex);
    cudaGraphicsMapResources(1, &srcDepth);
     
    cudaGraphicsSubResourceGetMappedArray( &srcTexArray, srcTex, 0, 0);
    cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0);
     
     
    cudaBindTextureToArray( texRef, srcTexArray );   
    cudaBindTextureToArray( depRef, srcDepthArray);
    ...
    uchar4 color = tex2D(texRef, x, y);
    float4 depth = tex2D(depRef, x, y);

  2. #2
    Senior Member OpenGL Guru Dark Photon's Avatar
    Join Date
    Oct 2004
    Location
    Druidia
    Posts
    4,394
    Quote Originally Posted by hoang1127 View Post
    CUDA interop OpenGL work for RGBA texture but does not work for depth value. As far as I know we need to map the depth component to a texture buffer then use CUDA interop for depth as a color texture. Below is how I did. Howver I can get only RGB but the depth always show 0. Can someone show me what is wrong or how can I use CUDA interop for depth bu...
    Hit the same issue years ago when I wanted to do a depth buffer crunch in OpenCL. This thread might have some info you can use.

  3. #3
    Junior Member Newbie
    Join Date
    Dec 2014
    Posts
    6
    Thank you, Dark Photon, is the solution to map depth value to color texure is " NV_copy_depth_to_color" parameter?
    Hoang1127

  4. #4
    Senior Member OpenGL Guru Dark Photon's Avatar
    Join Date
    Oct 2004
    Location
    Druidia
    Posts
    4,394
    Looks like that might work, if your depth buffer isn't floating point or multisample, and if you limit yourself to NVidia. That's a really, really old extension, so I'd time it to make sure it performs well for you.

    You could also do ReadPixels()/TexSubImage2D() to/from a GPU PBO. That should be pretty fast, portable, and support fixed- or floating-point depth (but won't work if multisampled). If you do this, you might avoid doing a format conversion (initially at least). For instance, if you're using fixed-point 24/8 packed depth/stencil, initially use GL_UNSIGNED_INT_24_8_EXT; or if 32F depth buffer, use GL_FLOAT.

    Or you can do what I did and just write depth to a R32F color channel/texture while rendering. That fast and works too.

    Or you can have your own simple post-process pass that does depth-to-color conversion (frag shader: read depth, write color).

    Lots of options here.
    Last edited by Dark Photon; 12-27-2014 at 05:54 PM.

  5. #5
    Junior Member Newbie
    Join Date
    Dec 2014
    Posts
    6
    Happy New Year Dark Photon, I have spent the whole week to write the code and test for depth with R32F. But it still does not show the right depth image. Here below are the code which I have edited from a CUDA interop with texture color. Where can be the error?
    The main.cpp:
    Code :
    #include <stdio.h>
    #include <stdlib.h>
    #include <cstdlib>
    #include <iostream>
    #include <string>
    #include <math.h>
    #define GLEW_STATIC // Specify GLEW_STATIC to use the static linked library (.lib) instead of the dynamic linked library (.dll) for GLEW
    #include <GL/glew.h>
    #include <glut.h>
     
    // CUDA headers
    #include <cuda_runtime_api.h>
    #include <cuda_gl_interop.h>
     
    #include "Postprocess.cu"
     
    #define SRC_BUFFER  0
    #define DST_BUFFER  1
    #define SRC_DEPTH   2
     
     
    int g_iGLUTWindowHandle = 0;
    int g_iWindowPositionX = 0;
    int g_iWindowPositionY = 0;
    int g_iWindowWidth = 512;
    int g_iWindowHeight = 512;
     
    int g_iImageWidth = g_iWindowWidth;
    int g_iImageHeight = g_iWindowHeight;
     
    float g_fRotate[3] = { 0.0f, 0.0f, 0.0f };  // Rotation parameter for scene object.
    bool g_bAnimate = true;                     // Animate the scene object
    bool g_bPostProcess = true;                 // Enable/disable the postprocess effect.
    float g_fBlurRadius = 2.0f;                 // Radius of 2D convolution blur performed in postprocess step.
     
    GLuint g_GLFramebuffer = 0;                  // Frame buffer object for off-screen rendering.
    GLuint g_GLColorAttachment0 = 0;            // Color texture to attach to frame buffer object.
    GLuint g_GLDepthAttachment = 0;             // Depth buffer to attach to frame buffer object.
    GLuint depthTexture	= 0;
    GLuint g_GLPostprocessTexture = 0;          // This is where the result of the post-process effect will go.
                                                // This is also the final texture that will be blit to the back buffer for viewing.
     
     
    // The CUDA Graphics Resource is used to map the OpenGL texture to a CUDA
    // buffer that can be used in a CUDA kernel.
    // We need 2 resource: One will be used to map to the color attachment of the
    //   framebuffer and used read-only from the CUDA kernel (SRC_BUFFER), 
    //   the second is used to write the postprocess effect to (DST_BUFFER).
    cudaGraphicsResource_t g_CUDAGraphicsResource[3] = { 0,  0, 0};   
     
    // Initialize OpenGL/GLUT
    bool InitGL( int argc, char* argv[] );
    // Initialize CUDA for OpenGL
    void InitCUDA();
    // Render a texture object to the current framebuffer
    void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height );
     
     
    // Create a framebuffer object that is used for offscreen rendering.
    void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture );
    void DeleteFramebuffer( GLuint& framebuffer );
     
    void CreatePBO( GLuint& bufferID, size_t size );
    void DeletePBO( GLuint& bufferID );
     
    void CreateTexture( GLuint& texture, unsigned int width, unsigned int height );
    void DeleteTexture( GLuint& texture );
     
    void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height );
    void DeleteDepthBuffer( GLuint& depthBuffer );
     
    // Links a OpenGL texture object to a CUDA resource that can be used in the CUDA kernel.
    void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags );
    void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource );
     
    void IdleGL();
    void DisplayGL();
    void KeyboardGL( unsigned char key, int x, int y );
    void ReshapeGL( int w, int h );
     
    void Cleanup( int errorCode, bool bExit = true )
    {
        if ( g_iGLUTWindowHandle != 0 )
        {
            glutDestroyWindow( g_iGLUTWindowHandle );
            g_iGLUTWindowHandle = 0;
        }
        if ( bExit )
        {
            exit( errorCode );
        }
    }
     
    // Create a pixel buffer object
    void CreatePBO( GLuint& bufferID, size_t size )
    {
        // Make sure the buffer doesn't already exist
        DeletePBO( bufferID );
     
        glGenBuffers( 1, &bufferID );
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER, bufferID );
        glBufferData( GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW );
     
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
    }
     
    void DeletePBO(  GLuint& bufferID )
    {
        if ( bufferID != 0 )
        {
            glDeleteBuffers( 1, &bufferID );
            bufferID = 0;
        }
    }
     
    // Create a texture resource for rendering to.
    void CreateTexture( GLuint& texture, unsigned int width, unsigned int height )
    {
        // Make sure we don't already have a texture defined here
        DeleteTexture( texture );
     
        glGenTextures( 1, &texture );
        glBindTexture( GL_TEXTURE_2D, texture );
     
        // set basic parameters
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
     
        // Create texture data (4-component unsigned byte)
        glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );
     
        // Unbind the texture
        glBindTexture( GL_TEXTURE_2D, 0 );
    }
     
    void DeleteTexture( GLuint& texture )
    {
        if ( texture != 0 )
        {
            glDeleteTextures(1, &texture );
            texture = 0;
        }
    }
     
    void CreateDepthTexture(GLuint& txDepth, unsigned int width, unsigned int height)
    {
    	DeleteTexture(txDepth);
     
    	glGenTextures(1, &txDepth);
    	glBindTexture(GL_TEXTURE_2D, txDepth);
     
    	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);	
     
    	glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
     
    	glBindTexture(GL_TEXTURE_2D, 0);
    }
     
     
    void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height )
    {
        // Delete the existing depth buffer if there is one.
        DeleteDepthBuffer( depthBuffer );
     
        glGenRenderbuffers( 1, &depthBuffer );
        glBindRenderbuffer( GL_RENDERBUFFER, depthBuffer );
     
        glRenderbufferStorage( GL_RENDERBUFFER, GL_DEPTH_COMPONENT, width, height );
     
        // Unbind the depth buffer
        glBindRenderbuffer( GL_RENDERBUFFER, 0 );
     
    }
     
    void DeleteDepthBuffer( GLuint& depthBuffer )
    {
        if ( depthBuffer != 0 )
        {
            glDeleteRenderbuffers( 1, &depthBuffer );
            depthBuffer = 0;
        }
    }
     
    void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture )
    {
        // Delete the existing framebuffer if it exists.
        DeleteFramebuffer( framebuffer );
     
        glGenFramebuffers( 1, &framebuffer );
        glBindFramebuffer( GL_FRAMEBUFFER, framebuffer );
     
        glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorAttachment0, 0 );
        glFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,  GL_TEXTURE_2D, depthTexture, 0);
       glFramebufferRenderbuffer( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depthAttachment );
     
        // Check to see if the frame buffer is valid
        GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
        if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
        {
            std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
        }
     
        // Unbind the frame buffer
        glBindFramebuffer( GL_FRAMEBUFFER, 0 );
    }
     
    void DeleteFramebuffer( GLuint& framebuffer )
    {
        if ( framebuffer != 0 )
        {
            glDeleteFramebuffers( 1, &framebuffer );
            framebuffer = 0;
        }
    }
     
    void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags )
    {
        // Map the GL texture resource with the CUDA resource
        cudaGraphicsGLRegisterImage( &cudaResource, GLtexture, GL_TEXTURE_2D, mapFlags );
    }
     
    void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource )
    {
        if ( cudaResource != 0 )
        {
            cudaGraphicsUnregisterResource( cudaResource );
            cudaResource = 0;
        }
    }
     
    int main( int argc, char* argv[] )
    {
        glutInit(&argc, argv);
     
        // Init GLUT
        if ( !InitGL( argc, argv ) )
        {
            std::cerr << "ERROR: Failed to initialize OpenGL" << std::endl;
        }
     
        InitCUDA();
     
        // Startup our GL render loop
        glutMainLoop();
     
    }
     
     
    bool InitGL( int argc, char* argv[] )
    {
        // Material property constants.
        const GLfloat fRed[] = { 1.0f, 0.1f, 0.1f, 1.0f };
        const GLfloat fWhite[] = { 1.0f, 1.0f, 1.0f, 1.0f };
     
        int iScreenWidth = glutGet(GLUT_SCREEN_WIDTH);
        int iScreenHeight = glutGet(GLUT_SCREEN_HEIGHT);
     
    //    glutInit( &argc, argv );
        glutInitDisplayMode( GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH );
        glutInitWindowPosition( iScreenWidth / 2 - g_iWindowWidth / 2,
            iScreenHeight / 2 - g_iWindowHeight / 2 );
        glutInitWindowSize( g_iWindowWidth, g_iWindowHeight );
     
        g_iGLUTWindowHandle = glutCreateWindow( "Postprocess GL" );
     
        // Register GLUT callbacks
        glutDisplayFunc(DisplayGL);
        glutKeyboardFunc(KeyboardGL);
        glutReshapeFunc(ReshapeGL);
        glutIdleFunc(IdleGL);
     
        // Init GLEW
        glewInit();
        GLboolean gGLEW = glewIsSupported(
            "GL_VERSION_3_1 " 
            "GL_ARB_pixel_buffer_object "
            "GL_ARB_framebuffer_object "
            "GL_ARB_copy_buffer " 
            );
     
        int maxAttachemnts = 0;
        glGetIntegerv( GL_MAX_COLOR_ATTACHMENTS, &maxAttachemnts );
     
        if ( !gGLEW ) return false;
     
        glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );
        glDisable( GL_DEPTH_TEST );
     
        // Setup the viewport
        glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );
     
        // Setup the projection matrix
        glMatrixMode( GL_PROJECTION );
        glLoadIdentity();
     
        gluPerspective( 60.0, (GLdouble)g_iWindowWidth/(GLdouble)g_iWindowHeight, 0.1, 1.0 );
        glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );
     
        // Enable one light.
        glEnable( GL_LIGHT0 );
        glMaterialfv( GL_FRONT_AND_BACK, GL_DIFFUSE, fRed );
        glMaterialfv( GL_FRONT_AND_BACK, GL_SPECULAR, fWhite );
        glMaterialf( GL_FRONT_AND_BACK, GL_SHININESS, 60.0f );
     
        return true;
    }
     
    void InitCUDA()
    {
        // We have to call cudaGLSetGLDevice if we want to use OpenGL interoperability.
        cudaGLSetGLDevice(0);
    }
     
     
    void IdleGL()
    {
        if (g_bAnimate) 
        {
            g_fRotate[0] += 0.2; while(g_fRotate[0] > 360.0f) g_fRotate[0] -= 360.0f;   // Increment and clamp
            g_fRotate[1] += 0.6; while(g_fRotate[1] > 360.0f) g_fRotate[1] -= 360.0f;
            g_fRotate[2] += 1.0; while(g_fRotate[2] > 360.0f) g_fRotate[2] -= 360.0f;
        }
     
        glutPostRedisplay();
     
    }
     
    // Render the initial scene
    void RenderScene()
    {
        glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
     
        glMatrixMode( GL_PROJECTION );
        glLoadIdentity();
        gluPerspective( 60.0, (GLdouble)g_iWindowWidth / (GLdouble)g_iWindowHeight, 0.1, 10.0 );
     
        glMatrixMode( GL_MODELVIEW );
        glLoadIdentity();
        glTranslatef( 0.0f, 0.0f, -3.0f );
     
        glRotatef( g_fRotate[0], 1.0f, 0.0f, 0.0f );
        glRotatef( g_fRotate[1], 0.0f, 1.0f, 0.0f );
        glRotatef( g_fRotate[2], 0.0f, 0.0f, 1.0f );
     
        glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );
     
        glEnable( GL_LIGHTING );
        glEnable( GL_DEPTH_TEST );
        glDepthFunc( GL_LESS );
     
        glutSolidTeapot( 1.0 );
     
    }
     
    // Perform a post-process effect on the current framebuffer (back buffer)
    void Postprocess()
    {
        if ( g_bPostProcess )
        {
     
            PostprocessCUDA( g_CUDAGraphicsResource[DST_BUFFER], g_CUDAGraphicsResource[SRC_BUFFER], g_CUDAGraphicsResource[SRC_DEPTH] , g_iImageWidth, g_iImageHeight );
        }
        else
        {
            // No postprocess effect. Just copy the contents of the color buffer
            // from the framebuffer (where the scene was rendered) to the 
            // post-process texture.  The postprocess texture will be rendered to the screen
            // in the next step.
            glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
            glBindTexture( GL_TEXTURE_2D, g_GLPostprocessTexture );
     
            glCopyTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, 0, 0, g_iImageWidth, g_iImageHeight, 0 );
     
            glBindTexture( GL_TEXTURE_2D, 0 );
            glBindFramebuffer( GL_FRAMEBUFFER, 0 );
        }
    }
     
    void DisplayGL()
    {
        // Bind the framebuffer that we want to use as the render target.
        glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
        RenderScene();
        // Unbind the framebuffer so we render to the back buffer again.
        glBindFramebuffer( GL_FRAMEBUFFER, 0 );
     
        Postprocess();
     
        // Blit the image full-screen
        DisplayImage( g_GLPostprocessTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );
     
        glutSwapBuffers();
        glutPostRedisplay();
    }
     
    void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height )
    {
        glBindTexture(GL_TEXTURE_2D, texture);
        glEnable(GL_TEXTURE_2D);
        glDisable(GL_DEPTH_TEST);
        glDisable(GL_LIGHTING);
        glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
     
        glMatrixMode(GL_PROJECTION);
        glPushMatrix();
        glLoadIdentity();
        glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
     
        glMatrixMode( GL_MODELVIEW);
        glLoadIdentity();
     
        glPushAttrib( GL_VIEWPORT_BIT );
        glViewport(x, y, width, height );
     
        glBegin(GL_QUADS);
        glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, -1.0, 0.5);
        glTexCoord2f(1.0, 0.0); glVertex3f(1.0, -1.0, 0.5);
        glTexCoord2f(1.0, 1.0); glVertex3f(1.0, 1.0, 0.5);
        glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, 1.0, 0.5);
        glEnd();
     
        glPopAttrib();
     
        glMatrixMode(GL_PROJECTION);
        glPopMatrix();
     
        glDisable(GL_TEXTURE_2D);
    }
     
    void KeyboardGL( unsigned char key, int x, int y )
    {
        switch( key )
        {
        case '\033': // escape quits
        case 'Q':    // Q quits
        case 'q':    // q quits
            {
                // Cleanup up and quit
                Cleanup(0);
            }
            break;
        }
     
        glutPostRedisplay();
    }
     
    void ReshapeGL( int w, int h )
    {
        h = std::max(h, 1);
     
        g_iWindowWidth = w;
        g_iWindowHeight = h;
     
        g_iImageWidth = w;
        g_iImageHeight = h;
     
        // Create a surface texture to render the scene to.
        CreateTexture( g_GLColorAttachment0, g_iImageWidth, g_iImageHeight );
     
        // Create a depth buffer for the frame buffer object.
        CreateDepthBuffer( g_GLDepthAttachment, g_iImageWidth, g_iImageHeight );
     
        CreateDepthTexture( depthTexture, g_iImageWidth, g_iImageHeight);
     
        // Attach the color and depth textures to the framebuffer.
        CreateFramebuffer( g_GLFramebuffer, g_GLColorAttachment0, g_GLDepthAttachment , depthTexture);
     
        // Create a texture to render the post-process effect to.
        CreateTexture( g_GLPostprocessTexture, g_iImageWidth, g_iImageHeight );
     
        // Map the color attachment to a CUDA graphics resource so we can read it in a CUDA a kernel.
        CreateCUDAResource( g_CUDAGraphicsResource[SRC_BUFFER], g_GLColorAttachment0, cudaGraphicsMapFlagsReadOnly );
        CreateCUDAResource( g_CUDAGraphicsResource[SRC_DEPTH], depthTexture, cudaGraphicsMapFlagsReadOnly );
        // Map the post-process texture to the CUDA resource so it can be 
        // written in the kernel.
        CreateCUDAResource( g_CUDAGraphicsResource[DST_BUFFER], g_GLPostprocessTexture, cudaGraphicsMapFlagsWriteDiscard );
     
        glutPostRedisplay();
    }

    And here is postProcess.cu:
    Code :
    #include <cuda_runtime_api.h>
    #include "Postprocess.h"
     
    #define BLOCK_SIZE 16     // block size
     
    texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
    texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;
     
    __global__ void PostprocessKernel( uchar4* dst, unsigned int imgWidth, unsigned int imgHeight )
    {
        unsigned int tx = threadIdx.x;
        unsigned int ty = threadIdx.y;
        unsigned int bw = blockDim.x;
        unsigned int bh = blockDim.y;
        // Non-normalized U, V coordinates of input texture for current thread.
        unsigned int u = ( bw * blockIdx.x ) + tx;
        unsigned int v = ( bh * blockIdx.y ) + ty;
     
        // Early-out if we are beyond the texture coordinates for our texture.
        if ( u > imgWidth || v > imgHeight ) return;
     
        unsigned int index = ( v * imgWidth ) + u;
     //   uchar4 color = tex2D( texRef, u, v );
        float depth = tex2D(depRef, u, v);
     //   dst[index] = make_uchar4( color.x, color.y, color.z, 1);
     
        dst[index] = make_uchar4( depth*255, depth*255, depth*255, 1);
       }
     
    uchar4* g_dstBuffer = NULL;
    size_t g_BufferSize = 0; 
     
    void PostprocessCUDA( cudaGraphicsResource_t& dst, cudaGraphicsResource_t& src, cudaGraphicsResource_t& srcDepth,  unsigned int width, unsigned int height)
    {
     
        cudaGraphicsResource_t resources[2] = { srcDepth, dst };
     
        // Map the resources so they can be used in the kernel.
       cudaGraphicsMapResources( 2, resources ) ;
     
     
     //   cudaArray* srcArray;
        cudaArray* dstArray;   
        cudaArray* srcDepthArray;   
     
        // Get a device pointer to the OpenGL buffers
     //  cudaGraphicsSubResourceGetMappedArray( &srcArray, src, 0, 0 ) ;
        cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0 ) ;
        cudaGraphicsSubResourceGetMappedArray( &dstArray, dst, 0, 0 ) ;
     
        // Map the source texture to a texture reference.
      //   cudaBindTextureToArray( texRef, srcArray );
         cudaBindTextureToArray( depRef, srcDepthArray );
     
         // Destination buffer to store the result of the postprocess effect.
        size_t bufferSize = width * height * sizeof(uchar4);
        if ( g_BufferSize != bufferSize )
        {
            if ( g_dstBuffer != NULL )
            {
                cudaFree( g_dstBuffer );
            }
            // Only re-allocate the global memory buffer if the screen size changes, 
            // or it has never been allocated before (g_BufferSize is still 0)
            g_BufferSize = bufferSize;
            cudaMalloc( &g_dstBuffer, g_BufferSize );
        }
     
        // Compute the grid size
        size_t blocksW = (size_t)ceilf( width / (float)BLOCK_SIZE );
        size_t blocksH = (size_t)ceilf( height / (float)BLOCK_SIZE );
        dim3 gridDim( blocksW, blocksH, 1 );
        dim3 blockDim( BLOCK_SIZE, BLOCK_SIZE, 1 );
     
        PostprocessKernel<<< gridDim, blockDim >>>( g_dstBuffer, width, height );
     
        // Copy the destination back to the source array
        cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice  );
     
        // Unbind the texture reference
     //   cudaUnbindTexture( texRef);
        cudaUnbindTexture( depRef);
     
        // Unmap the resources again so the texture can be rendered in OpenGL
         cudaGraphicsUnmapResources( 2, resources ) ;
    }

    And here is CMakeLists.txt
    Code :
    cmake_minimum_required (VERSION 2.8)
    project (PostprocessLinux)
     
    find_package(CUDA)
     
    include_directories(${PROJECT_SOURCE_DIR}/include)
    link_directories(${PROJECT_SOURCE_DIR}/lib)
     
    cuda_include_directories("/usr/local/cuda-6.5/include")
    link_directories("/usr/local/cuda-6.5/lib64")
     
    cuda_compile(Postprocess_O, Postprocess.cu)
    cuda_compile(Main_O main.cu)
     
    cuda_add_executable (test ${Postprocess_O} ${Main_O} )
     
    target_link_libraries(test "-lglut" "-lGL" "-lGLU" "-lGLEW" "-lpthread" "-lcudart")
     
    INSTALL(TARGETS test RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
    To build it, you need make a build directory and cmake .. and then make

  6. #6
    Senior Member OpenGL Guru Dark Photon's Avatar
    Join Date
    Oct 2004
    Location
    Druidia
    Posts
    4,394
    Quote Originally Posted by hoang1127
    Code :
        glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
        ...
        glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorAttachment0, 0 );
        glFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,  GL_TEXTURE_2D, depthTexture, 0);
        glFramebufferRenderbuffer( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depthAttachment );
    You might check for GL errors. This doesn't look right.

    First, R32F is a color format. You're creating a R32F color texture, but then requesting to populate it with a DEPTH_COMPONENT format. I don't think that's valid. Since you provided NULL for the pointer it's probably academic though.

    Then also you've got two bindings for the DEPTH_ATTACHMENT for the FBO. You need one.

    Finally, AFAIK you can't use an R32F color texture as a depth attachment. You need a DEPTH_COMPONENT renderbuffer or texture here.

    You either need to rasterize out a 2nd color buffer with the depth value (this is your R32F texture), or after rendering to your depthAttachment, you need to generate the R32F texture/renderbuffer from that via one of the methods mentioned.

  7. #7
    Junior Member Newbie
    Join Date
    Dec 2014
    Posts
    6
    Thanks you for the suggestion. They are corrected all now. But still shows black screen when draw depth texture. When drawing with color texture (dst[index] = make_uchar4( color.x, color.y, color.z, 1) then It shows correctly. Now the R32F is the second color texture. And It is mapped to read in kernel. Where can be wrong. Here is the code.

    main.cpp
    Code :
    #include <stdio.h>
    #include <stdlib.h>
    #include <cstdlib>
    #include <iostream>
    #include <string>
    #include <math.h>
    #define GLEW_STATIC // Specify GLEW_STATIC to use the static linked library (.lib) instead of the dynamic linked library (.dll) for GLEW
    #include <GL/glew.h>
    #include <glut.h>
     
    // CUDA headers
    #include <cuda_runtime_api.h>
    #include <cuda_gl_interop.h>
     
    #include "Postprocess.cu"
     
    #define SRC_BUFFER  0
    #define DST_BUFFER  1
    #define SRC_DEPTH   2
     
     
    int g_iGLUTWindowHandle = 0;
    int g_iWindowPositionX = 0;
    int g_iWindowPositionY = 0;
    int g_iWindowWidth = 512;
    int g_iWindowHeight = 512;
     
    int g_iImageWidth = g_iWindowWidth;
    int g_iImageHeight = g_iWindowHeight;
     
    float g_fRotate[3] = { 0.0f, 0.0f, 0.0f };  // Rotation parameter for scene object.
     
    float g_fBlurRadius = 2.0f;                 // Radius of 2D convolution blur performed in postprocess step.
     
    GLuint g_GLFramebuffer = 0;                  // Frame buffer object for off-screen rendering.
    GLuint g_GLColorAttachment0 = 0;            // Color texture to attach to frame buffer object.
    GLuint g_GLDepthAttachment = 0;             // Depth buffer to attach to frame buffer object.
    GLuint depthTexture	= 0;
    GLuint g_GLPostprocessTexture = 0;          // This is where the result of the post-process effect will go.
                                                // This is also the final texture that will be blit to the back buffer for viewing.
     
     
    // The CUDA Graphics Resource is used to map the OpenGL texture to a CUDA
    // buffer that can be used in a CUDA kernel.
    // We need 2 resource: One will be used to map to the color attachment of the
    //   framebuffer and used read-only from the CUDA kernel (SRC_BUFFER), 
    //   the second is used to write the postprocess effect to (DST_BUFFER).
    cudaGraphicsResource_t g_CUDAGraphicsResource[3] = { 0,  0, 0};   
     
    // Initialize OpenGL/GLUT
    bool InitGL( int argc, char* argv[] );
    // Initialize CUDA for OpenGL
    void InitCUDA();
    // Render a texture object to the current framebuffer
    void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height );
     
     
    // Create a framebuffer object that is used for offscreen rendering.
    void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture );
    void DeleteFramebuffer( GLuint& framebuffer );
     
    void CreatePBO( GLuint& bufferID, size_t size );
    void DeletePBO( GLuint& bufferID );
     
    void CreateTexture( GLuint& texture, unsigned int width, unsigned int height );
    void DeleteTexture( GLuint& texture );
     
    void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height );
    void DeleteDepthBuffer( GLuint& depthBuffer );
     
    // Links a OpenGL texture object to a CUDA resource that can be used in the CUDA kernel.
    void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags );
    void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource );
     
    void IdleGL();
    void DisplayGL();
    void KeyboardGL( unsigned char key, int x, int y );
    void ReshapeGL( int w, int h );
     
    void Cleanup( int errorCode, bool bExit = true )
    {
        if ( g_iGLUTWindowHandle != 0 )
        {
            glutDestroyWindow( g_iGLUTWindowHandle );
            g_iGLUTWindowHandle = 0;
        }
        if ( bExit )
        {
            exit( errorCode );
        }
    }
     
    // Create a pixel buffer object
    void CreatePBO( GLuint& bufferID, size_t size )
    {
        // Make sure the buffer doesn't already exist
        DeletePBO( bufferID );
     
        glGenBuffers( 1, &bufferID );
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER, bufferID );
        glBufferData( GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW );
     
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
    }
     
    void DeletePBO(  GLuint& bufferID )
    {
        if ( bufferID != 0 )
        {
            glDeleteBuffers( 1, &bufferID );
            bufferID = 0;
        }
    }
     
    // Create a texture resource for rendering to.
    void CreateTexture( GLuint& texture, unsigned int width, unsigned int height )
    {
        // Make sure we don't already have a texture defined here
        DeleteTexture( texture );
     
        glGenTextures( 1, &texture );
        glBindTexture( GL_TEXTURE_2D, texture );
     
        // set basic parameters
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
     
        // Create texture data (4-component unsigned byte)
        glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );
     
        // Unbind the texture
        glBindTexture( GL_TEXTURE_2D, 0 );
    }
     
    void DeleteTexture( GLuint& texture )
    {
        if ( texture != 0 )
        {
            glDeleteTextures(1, &texture );
            texture = 0;
        }
    }
     
    void CreateDepthTexture(GLuint& txDepth, unsigned int width, unsigned int height)
    {
    	DeleteTexture(txDepth);
     
    	glGenTextures(1, &txDepth);
    	glBindTexture(GL_TEXTURE_2D, txDepth);
     
    	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);	
     
    	glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
     
    	glBindTexture(GL_TEXTURE_2D, 0);
    }
     
     
    void CreateDepthBuffer( GLuint& depthBuffer, unsigned int width, unsigned int height )
    {
        // Delete the existing depth buffer if there is one.
        DeleteDepthBuffer( depthBuffer );
     
        glGenRenderbuffers( 1, &depthBuffer );
        glBindRenderbuffer( GL_RENDERBUFFER, depthBuffer );
     
        glRenderbufferStorage( GL_RENDERBUFFER, GL_DEPTH_COMPONENT, width, height );
     
        // Unbind the depth buffer
        glBindRenderbuffer( GL_RENDERBUFFER, 0 );
     
    }
     
    void DeleteDepthBuffer( GLuint& depthBuffer )
    {
        if ( depthBuffer != 0 )
        {
            glDeleteRenderbuffers( 1, &depthBuffer );
            depthBuffer = 0;
        }
    }
     
    void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthAttachment , GLuint depthTexture )
    {
        // Delete the existing framebuffer if it exists.
        DeleteFramebuffer( framebuffer );
     
        glGenFramebuffers( 1, &framebuffer );
        glBindFramebuffer( GL_FRAMEBUFFER, framebuffer );
     
         glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorAttachment0, 0 );
        glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1,  GL_TEXTURE_2D, depthTexture, 0);   
        glFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,  GL_TEXTURE_2D, depthAttachment, 0);
     
        // Check to see if the frame buffer is valid
        GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
        if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
        {
            std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
        }
     
        // Unbind the frame buffer
        glBindFramebuffer( GL_FRAMEBUFFER, 0 );
    }
     
    void DeleteFramebuffer( GLuint& framebuffer )
    {
        if ( framebuffer != 0 )
        {
            glDeleteFramebuffers( 1, &framebuffer );
            framebuffer = 0;
        }
    }
     
    void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags )
    {
        // Map the GL texture resource with the CUDA resource
        cudaGraphicsGLRegisterImage( &cudaResource, GLtexture, GL_TEXTURE_2D, mapFlags );
    }
     
    void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource )
    {
        if ( cudaResource != 0 )
        {
            cudaGraphicsUnregisterResource( cudaResource );
            cudaResource = 0;
        }
    }
     
    int main( int argc, char* argv[] )
    {
        glutInit(&argc, argv);
     
        // Init GLUT
        if ( !InitGL( argc, argv ) )
        {
            std::cerr << "ERROR: Failed to initialize OpenGL" << std::endl;
        }
     
        InitCUDA();
     
        // Startup our GL render loop
        glutMainLoop();
     
    }
     
     
    bool InitGL( int argc, char* argv[] )
    {
        // Material property constants.
        const GLfloat fRed[] = { 1.0f, 0.1f, 0.1f, 1.0f };
        const GLfloat fWhite[] = { 1.0f, 1.0f, 1.0f, 1.0f };
     
        int iScreenWidth = glutGet(GLUT_SCREEN_WIDTH);
        int iScreenHeight = glutGet(GLUT_SCREEN_HEIGHT);
     
    //    glutInit( &argc, argv );
        glutInitDisplayMode( GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH );
        glutInitWindowPosition( iScreenWidth / 2 - g_iWindowWidth / 2,
            iScreenHeight / 2 - g_iWindowHeight / 2 );
        glutInitWindowSize( g_iWindowWidth, g_iWindowHeight );
     
        g_iGLUTWindowHandle = glutCreateWindow( "Postprocess GL" );
     
        // Register GLUT callbacks
        glutDisplayFunc(DisplayGL);
        glutKeyboardFunc(KeyboardGL);
        glutReshapeFunc(ReshapeGL);
        glutIdleFunc(IdleGL);
     
        // Init GLEW
        glewInit();
        GLboolean gGLEW = glewIsSupported(
            "GL_VERSION_3_1 " 
            "GL_ARB_pixel_buffer_object "
            "GL_ARB_framebuffer_object "
            "GL_ARB_copy_buffer " 
            );
     
        int maxAttachemnts = 0;
        glGetIntegerv( GL_MAX_COLOR_ATTACHMENTS, &maxAttachemnts );
     
        if ( !gGLEW ) return false;
     
        glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );
     //   glDisable( GL_DEPTH_TEST );
     
        // Setup the viewport
        glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );
     
        // Setup the projection matrix
        glMatrixMode( GL_PROJECTION );
        glLoadIdentity();
     
        gluPerspective( 60.0, (GLdouble)g_iWindowWidth/(GLdouble)g_iWindowHeight, 0.1, 1.0 );
        glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );
     
        // Enable one light.
        glEnable( GL_LIGHT0 );
        glMaterialfv( GL_FRONT_AND_BACK, GL_DIFFUSE, fRed );
        glMaterialfv( GL_FRONT_AND_BACK, GL_SPECULAR, fWhite );
        glMaterialf( GL_FRONT_AND_BACK, GL_SHININESS, 60.0f );
     
        return true;
    }
     
    void InitCUDA()
    {
        // We have to call cudaGLSetGLDevice if we want to use OpenGL interoperability.
        cudaGLSetGLDevice(0);
    }
     
     
    void IdleGL()
    {
     
        glutPostRedisplay();
     
    }
     
    // Render the initial scene
    void RenderScene()
    {
        glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
     
        glMatrixMode( GL_PROJECTION );
        glLoadIdentity();
        gluPerspective( 60.0, (GLdouble)g_iWindowWidth / (GLdouble)g_iWindowHeight, 0.1, 10.0 );
     
        glMatrixMode( GL_MODELVIEW );
        glLoadIdentity();
        glTranslatef( 0.0f, 0.0f, -3.0f );
     
        glRotatef( g_fRotate[0], 1.0f, 0.0f, 0.0f );
        glRotatef( g_fRotate[1], 0.0f, 1.0f, 0.0f );
        glRotatef( g_fRotate[2], 0.0f, 0.0f, 1.0f );
     
        glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );
     
        glEnable( GL_LIGHTING );
        glEnable( GL_DEPTH_TEST );
        glDepthFunc( GL_LESS );
     
        glutSolidTeapot( 1.0 );
     
    }
     
    // Perform a post-process effect on the current framebuffer (back buffer)
    void Postprocess()
    {
     
            PostprocessCUDA( g_CUDAGraphicsResource[DST_BUFFER], g_CUDAGraphicsResource[SRC_BUFFER], g_CUDAGraphicsResource[SRC_DEPTH] , g_iImageWidth, g_iImageHeight );
     
    }
     
    void DisplayGL()
    {
        // Bind the framebuffer that we want to use as the render target.
        glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
        RenderScene();
        // Unbind the framebuffer so we render to the back buffer again.
        glBindFramebuffer( GL_FRAMEBUFFER, 0 );
     
     //   unsigned char * readData = (unsigned char* )glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
     
    	// Test reading
     //   glBindTexture(GL_TEXTURE_2D, depthTexture );
     
    //    glActiveTexture(GL_TEXTURE0);
      //  glGetTexImage(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, GL_FLOAT, readData);
     
     
     //    glBindTexture(GL_TEXTURE_2D, 0);
     
        Postprocess();
     
        // Blit the image full-screen
        DisplayImage( g_GLPostprocessTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );
     
     
        glutSwapBuffers();
        glutPostRedisplay();
     
    }
     
    void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height )
    {
        glBindTexture(GL_TEXTURE_2D, texture);
        glEnable(GL_TEXTURE_2D);
        glDisable(GL_DEPTH_TEST);
        glDisable(GL_LIGHTING);
        glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
     
        glMatrixMode(GL_PROJECTION);
        glPushMatrix();
        glLoadIdentity();
        glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
     
        glMatrixMode( GL_MODELVIEW);
        glLoadIdentity();
     
        glPushAttrib( GL_VIEWPORT_BIT );
        glViewport(x, y, width, height );
     
        glBegin(GL_QUADS);
        glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, -1.0, 0.5);
        glTexCoord2f(1.0, 0.0); glVertex3f(1.0, -1.0, 0.5);
        glTexCoord2f(1.0, 1.0); glVertex3f(1.0, 1.0, 0.5);
        glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, 1.0, 0.5);
        glEnd();
     
        glPopAttrib();
     
        glMatrixMode(GL_PROJECTION);
        glPopMatrix();
     
        glDisable(GL_TEXTURE_2D);
    }
     
    void KeyboardGL( unsigned char key, int x, int y )
    {
        switch( key )
        {
        case '\033': // escape quits
        case 'Q':    // Q quits
        case 'q':    // q quits
            {
                // Cleanup up and quit
                Cleanup(0);
            }
            break;
        }
     
        glutPostRedisplay();
    }
     
    void ReshapeGL( int w, int h )
    {
        h = std::max(h, 1);
     
        g_iWindowWidth = w;
        g_iWindowHeight = h;
     
        g_iImageWidth = w;
        g_iImageHeight = h;
     
        // Create a surface texture to render the scene to.
        CreateTexture( g_GLColorAttachment0, g_iImageWidth, g_iImageHeight );
       CreateDepthTexture( depthTexture, g_iImageWidth, g_iImageHeight);
     
        // Create a depth buffer for the frame buffer object.
    //    CreateDepthBuffer( g_GLDepthAttachment, g_iImageWidth, g_iImageHeight );
     
     
        // Attach the color and depth textures to the framebuffer.
        CreateFramebuffer( g_GLFramebuffer, g_GLColorAttachment0, g_GLDepthAttachment , depthTexture);
     
        // Create a texture to render the post-process effect to.
        CreateTexture( g_GLPostprocessTexture, g_iImageWidth, g_iImageHeight );
     
        // Map the color attachment to a CUDA graphics resource so we can read it in a CUDA a kernel.
        CreateCUDAResource( g_CUDAGraphicsResource[SRC_BUFFER], g_GLColorAttachment0, cudaGraphicsMapFlagsReadOnly );
        CreateCUDAResource( g_CUDAGraphicsResource[SRC_DEPTH], depthTexture, cudaGraphicsMapFlagsReadOnly );
        // Map the post-process texture to the CUDA resource so it can be 
        // written in the kernel.
        CreateCUDAResource( g_CUDAGraphicsResource[DST_BUFFER], g_GLPostprocessTexture, cudaGraphicsMapFlagsWriteDiscard );
     
        glutPostRedisplay();
    }

    Here is the Postprocess.cu:

    Code :
    #include <cuda_runtime_api.h>
    #include "Postprocess.h"
     
    #define BLOCK_SIZE 16     // block size
     
    texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
    texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;
     
    __global__ void PostprocessKernel( uchar4* dst, unsigned int imgWidth, unsigned int imgHeight )
    {
        unsigned int tx = threadIdx.x;
        unsigned int ty = threadIdx.y;
        unsigned int bw = blockDim.x;
        unsigned int bh = blockDim.y;
        // Non-normalized U, V coordinates of input texture for current thread.
        unsigned int u = ( bw * blockIdx.x ) + tx;
        unsigned int v = ( bh * blockIdx.y ) + ty;
     
        // Early-out if we are beyond the texture coordinates for our texture.
        if ( u > imgWidth || v > imgHeight ) return;
     
        unsigned int index = ( v * imgWidth ) + u;
        uchar4 color = tex2D( texRef, u, v );
        float depth = tex2D(depRef, u, v);
     
     //   dst[index] = make_uchar4( color.x, color.y, color.z, 1);
     
       dst[index] = make_uchar4( depth*255, depth*255, depth*255, 1);
       }
     
    uchar4* g_dstBuffer = NULL;
    size_t g_BufferSize = 0; 
     
    void PostprocessCUDA( cudaGraphicsResource_t& dst, cudaGraphicsResource_t& src, cudaGraphicsResource_t& srcDepth,  unsigned int width, unsigned int height)
    {
     
      //  cudaGraphicsResource_t resources[3] = { src, srcDepth, dst };
     
        // Map the resources so they can be used in the kernel.
       cudaGraphicsMapResources( 1, &src ) ;
       cudaGraphicsMapResources( 1, &srcDepth ) ;
       cudaGraphicsMapResources(1, &dst ) ;
     
     
       cudaArray* srcArray;
       cudaArray* dstArray;   
       cudaArray* srcDepthArray;   
     
        // Get a device pointer to the OpenGL buffers
       cudaGraphicsSubResourceGetMappedArray( &srcArray, src, 0, 0 ) ;
       cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0 ) ;
       cudaGraphicsSubResourceGetMappedArray( &dstArray, dst, 0, 0 ) ;
     
        // Map the source texture to a texture reference.
         cudaBindTextureToArray( texRef, srcArray );
         cudaBindTextureToArray( depRef, srcDepthArray );
     
         // Destination buffer to store the result of the postprocess effect.
        size_t bufferSize = width * height * sizeof(uchar4);
        if ( g_BufferSize != bufferSize )
        {
            if ( g_dstBuffer != NULL )
            {
                cudaFree( g_dstBuffer );
            }
            // Only re-allocate the global memory buffer if the screen size changes, 
            // or it has never been allocated before (g_BufferSize is still 0)
            g_BufferSize = bufferSize;
            cudaMalloc( &g_dstBuffer, g_BufferSize );
        }
     
        // Compute the grid size
        size_t blocksW = (size_t)ceilf( width / (float)BLOCK_SIZE );
        size_t blocksH = (size_t)ceilf( height / (float)BLOCK_SIZE );
        dim3 gridDim( blocksW, blocksH, 1 );
        dim3 blockDim( BLOCK_SIZE, BLOCK_SIZE, 1 );
     
        PostprocessKernel<<< gridDim, blockDim >>>( g_dstBuffer, width, height );
     
        // Copy the destination back to the source array
        cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice  );
     
        // Unbind the texture reference
        cudaUnbindTexture( texRef);
        cudaUnbindTexture( depRef);
     
        // Unmap the resources again so the texture can be rendered in OpenGL
         cudaGraphicsUnmapResources( 1, &src ) ;
        cudaGraphicsUnmapResources( 1, &srcDepth ) ;
        cudaGraphicsUnmapResources( 1, &dst ) ;
     
    }

    And the CMakelists.txt:

    Code :
    cmake_minimum_required (VERSION 2.8)
    project (PostprocessLinux)
     
    find_package(CUDA)
     
    include_directories(${PROJECT_SOURCE_DIR}/include)
    link_directories(${PROJECT_SOURCE_DIR}/lib)
     
    cuda_include_directories("/usr/local/cuda-6.5/include")
    link_directories("/usr/local/cuda-6.5/lib64")
     
    cuda_compile(Postprocess_O, Postprocess.cu)
    cuda_compile(Main_O main.cu)
     
    cuda_add_executable (test ${Postprocess_O} ${Main_O} )
     
    target_link_libraries(test "-lglut" "-lGL" "-lGLU" "-lGLEW" "-lpthread" "-lcudart")
     
    INSTALL(TARGETS test RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
    Last edited by hoang1127; 01-02-2015 at 12:01 PM.

  8. #8
    Senior Member OpenGL Guru Dark Photon's Avatar
    Join Date
    Oct 2004
    Location
    Druidia
    Posts
    4,394
    Quote Originally Posted by hoang1127 View Post
    Thanks you for the suggestion. They are corrected all now. But still shows black screen when draw depth texture.
    Suggestion: get your R32F color "depth" texture correct and leave CUDA on the sidelines for now. Render the R32F color texture containing depth to the screen with OpenGL in a separate pass. Linearize depth values so that you can see something useful (otherwise most of the values will be clustered up around near 1.0. Assuming a perspective projection, use something like this:

    Code cpp:
    float ComputeEyeSpaceDepth(in float depth) // Assumes perspective projection used to render "depth"
    {
      return near * far / ((depth * (far - near)) - far);
    }
     
    float depth_win = texelFetch( r32f_tex, ivec2( gl_FragCoord.xy ), 0 ).r;
    float depth_eye = ComputeEyeSpaceDepth( depth_win );
    float color     = (-depth_eye-near) / (far - near);
    gl_FragData[0] = vec4( vec3( color ), 1 );

  9. #9
    Junior Member Newbie
    Join Date
    Dec 2014
    Posts
    6
    With shader, It shows the depth value in render. But in CUDA kernel it still show emply (black).

    Click image for larger version. 

Name:	Screenshot from 2015-01-04 21:17:41.jpg 
Views:	247 
Size:	18.8 KB 
ID:	1582

    Can you suggest me anything.

  10. #10
    Senior Member OpenGL Guru Dark Photon's Avatar
    Join Date
    Oct 2004
    Location
    Druidia
    Posts
    4,394
    Quote Originally Posted by hoang1127 View Post
    With shader, It shows the depth value in render. But in CUDA kernel it still show emply (black). ... Can you suggest me anything.
    Sounds like you've resolved your OpenGL issues then. You'll probably get the best response to your CUDA questions on the CUDA forums at https://devtalk.nvidia.com/.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •