/*
 * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO USER:   
 *
 * This source code is subject to NVIDIA ownership rights under U.S. and 
 * international Copyright laws.  Users and possessors of this source code 
 * are hereby granted a nonexclusive, royalty-free license to use this code 
 * in individual and commercial software.
 *
 * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE 
 * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR 
 * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH 
 * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 
 * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, 
 * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 
 * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 
 * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE 
 * OR PERFORMANCE OF THIS SOURCE CODE.  
 *
 * U.S. Government End Users.   This source code is a "commercial item" as 
 * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of 
 * "commercial computer  software"  and "commercial computer software 
 * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) 
 * and is provided to the U.S. Government only as a commercial end item.  
 * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 
 * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the 
 * source code with only those rights set forth herein. 
 *
 * Any use of this source code in individual and commercial software must 
 * include, in the user documentation and internal comments to the code,
 * the above Disclaimer and U.S. Government End Users Notice.
 *
 *  Module name              : $RCSfile: ptxomp.c,v $   $Revision: 1.13 $
 *
 *  Last update              : $Date: 2004/11/15 18:29:53 $ UTC
 *
 *  Description              :
 * 
 *          This module defines the cuda compiled device code format
 *          that is able to hold a collection of compiled binaries corresponding
 *          with the same GPU source code: load images (cubins) for zero or more
 *          nVidia GPU architectures, plus PTX intermediate representations, possibly
 *          tuned for zero or more GPU architectures.
 * 
 *          It is the purpose of the cuda compilation driver (nvcc) to generate 
 *          __cudaFatCudaBinaries with actual contents depending on nvcc command line options
 *          selected by the user.
 *
 *          During runtime, the cuda driver can use function fatGetCubinForGpu for 
 *          retrieving an appropriate cubin load image, depending on the GPU
 *          architecture that is to run the device code. 
 *          fatGetCubinForGpu might employ any combination of the following strategegies
 *          for retrieving an appropriate cubin:
 *
 *          - Selected a matching one that is directly available in the __cudaFatCudaBinary.
 *          - Locate it in a resource file, or resource directory corresponding with the
 *            current executable. For this purpose, the __cudaFatCudaBinary provides a 'key' 
 *            string that can be considered as a unique name of the device code from 
 *            which __cudaFatCudaBinary was compiled.
 *          - Dynamically recompile an appropriate ptx intermediate representation for
 *            the current GPU architecture. The result of such compilation can be stored
 *            into a global CUDA compilation cache on the current computer platform.
 *           
 *          Note that the above allows for __cudaFatCudaBinaries that hold a large collection
 *          of chip specific cubins (which make the executable quite self contained),
 *          as well as completely empty __cudaFatCudaBinaries, with cubin or ptx intermediates
 *          located in resource files via the key string.
 *
 *          Note: This module needs to be completed with parameters that influence its
 *                recompilation strategy.
 */

#ifndef __cudaFatFormat_INCLUDED
#define __cudaFatFormat_INCLUDED

/*--------------------------------- Includes ---------------------------------*/



#ifdef __cplusplus
extern "C" {
#endif

/*----------------------------------- Types ----------------------------------*/

/*
 * Cubin entry type for __cudaFat binary. 
 * Cubins are specific to a particular gpu profile,
 * although the gpuInfo module might 'know'
 * that cubins will also run on other gpus.
 * Based on the recompilation strategy, 
 * fatGetCubinForGpu will return an existing
 * compatible load image, or attempt a recompilation.
 */
typedef struct {
    char*            gpuProfileName;
    char*            cubin;
} __cudaFatCubinEntry;


/*
 * Ptx entry type for __cudaFat binary.
 * PTX might use particular chip features
 * (such as double precision floating points).
 * When attempting to recompile for a certain 
 * gpu architecture, a ptx needs to be available
 * that depends on features that are either 
 * implemented by the gpu, or for which the ptx
 * translator can provide an emulation. 
 */
typedef struct {
    char*            gpuProfileName;            
    char*            ptx;
} __cudaFatPtxEntry;


/*
 * Debug entry type for __cudaFat binary.
 * Such information might, but need not be available
 * for Cubin entries (ptx files compiled in debug mode
 * will contain their own debugging information) 
 */
typedef struct {
    char*            gpuProfileName;            
    char*            debug;
} __cudaFatDebugEntry;


typedef enum {
      __cudaFatDontSearchFlag = (1 << 0),
      __cudaFatDontCacheFlag  = (1 << 1)
} __cudaFatCudaBinaryFlag;

/*
 * Fat binary container.
 * A mix of ptx intermediate programs and cubins,
 * plus a global identifier that can be used for 
 * further lookup in a translation cache or a resource
 * file. This key is a checksum over the device text.
 * The ptx and cubin array are each terminated with 
 * entries that have Null components.
 */
typedef struct {
    unsigned long     magic;
    unsigned long     version;
    unsigned long     gpuInfoVersion;
    char*            key;
    char*            ident;
    char*            usageMode;
    __cudaFatPtxEntry      *ptx;
    __cudaFatCubinEntry    *cubin;
    __cudaFatDebugEntry    *debug;
    void*           debugInfo;
    unsigned int            flags;
} __cudaFatCudaBinary;


/*
 * Current version and magic numbers:
 */
#define __cudaFatVERSION   0x00000002
#define __cudaFatMAGIC     0x1ee55a01

/*
 * Version history log:
 *    1  : __cudaFatDebugEntry field added to __cudaFatCudaBinary struct
 *    2  : flags and debugInfo field added.
 */


/*--------------------------------- Functions --------------------------------*/

/*
 * Function        : Select a load image from the __cudaFat binary
 *                   that will run on the specified GPU.
 * Parameters      : binary  (I) Fat binary
 *                   gpuName (I) Name of target GPU
 *                   cubin   (O) Returned cubin text string, or Null when 
 *                               no matching cubin for the specified gpu
 *                               could be found.
 *                   dbgInfo (O) If this parameter is not Null upon entry, then
 *                               the name of a file containing debug information
 *                               on the returned cubin will be returned, or Null 
 *                               will be returned when cubin or such debug info 
 *                               cannot be found.
 */
void fatGetCubinForGpu( __cudaFatCudaBinary *binary, char* gpuName, char* *cubin, char* *dbgInfoFile );


#ifdef __cplusplus
}
#endif

#endif