//
//
// File author(s): <Gabriel Rucabado and Antonio Garcia Dopico>, (C) 2014
//
// Copyright: this software is licenced under the terms stipulated in the license.txt file located in its root directory
//
//
#ifndef _CUDAEXCEPTION
#define _CUDAEXCEPTION
#include <string>
#include <sstream>
#include <exception>
// Includes from cuda
#include "cuda_runtime.h"
#include <cublas_v2.h>
#include <cusparse_v2.h>
//
// DEFINITIONS
//
#define CUDA_ID_DEF 0
#define OK_CUSPARSE CUSPARSE_STATUS_SUCCESS
#define OK_CUBLAS CUBLAS_STATUS_SUCCESS
#define OK_CUDA cudaSuccess
//#######################
//#### CONFIGURATION ####
//#######################
#define NUM_TH_XAXPY 512 //<=1024 (&& ^2 with VER32)
#define NUM_TH_REDUCE 256 //<=1024 (&& ^2 with VER32)
#define NUM_TH_VECTOR_OP 512 //<=1024 (&& ^2 with VER32)
#define NUM_TH_MATRIX_OP 512 //<=1024 (&& ^2 with VER32)
#define NUM_LEC_TH 4 //
#define MIN_MAJOR 2 // xa multiGPU
#define MIN_MINOR 0 // xa multiGPU
#define TH_HH 25
#define TH_BRANCH 32
#define TH_TERM 2
#define TH_ALL 64
//
// MACROS
//
#define DIM3(var, Vx, Vy, Vz) \
var.x = Vx; \
var.y = Vy; \
var.z = Vz
#if DEBUG >= DEBUG_MESS_LVL
# define _DEBUG_CUDA(fmt, args...) \
printf (DEBUG_COLOR "DEBUG_CUDA: Grid<%05d-%05d-%05d> Block<%05d-%05d-%05d>" \
" [%s -.- %s:%05d] ::: "fmt DEFAULT"\n", \
blockIdx.x, blockIdx.y, blockIdx.z, \
threadIdx.x, threadIdx.y, threadIdx.z, \
__BASE_FILE__, __FUNCTION__, __LINE__, ##args);
#else
# define _DEBUG_CUDA(d,f...)
#endif
# define _INFO_CUDA(fmt, args...) \
printf (INFO_COLOR "INFO_CUDA: Grid<%05d-%05d-%05d> Block<%05d-%05d-%05d>" \
" [%s -.- %s:%05d] ::: "fmt DEFAULT "\n", \
blockIdx.x, blockIdx.y, blockIdx.z, \
threadIdx.x, threadIdx.y, threadIdx.z, \
__BASE_FILE__, __FUNCTION__, __LINE__, ##args);
# define _ERROR_CUDA(fmt, args...) \
fprintf (stderr, ERROR_COLOR "ERROR_CUDA: Grid<%05d-%05d-%05d> Block<%05d-%05d-%05d>" \
" [%s -.- %s:%05d] ::: "fmt DEFAULT"\n", \
blockIdx.x, blockIdx.y, blockIdx.z, \
threadIdx.x, threadIdx.y, threadIdx.z, \
__BASE_FILE__, __FUNCTION__, __LINE__, ##args);
# define CUDA_ALLOC(ptr, size) \
{ \
cudaError_t error; \
std::stringstream params; \
if (( error = cudaMalloc ((void**)ptr, size)) != OK_CUDA){ \
params << "ptr=" << ptr << " size=" << size; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"When allocating the memory", error, params.str()); \
} \
}
#if DEBUG >= DEBUG_KERNEL_LVL
#define CUDA_EVENT_RECORD(ev, st) \
{ \
cudaError_t error; \
if ((error = cudaEventRecord(ev, st)) != OK_CUDA){ \
std::stringstream params; \
params << "event=" << ev << " stream=" << st; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"Tracking event updateHH<->updateMat", \
error, params.str()); \
} \
}
#define CUDA_EVENT_WAIT(ev, st) \
{ \
cudaError_t error; \
if ((error = cudaStreamWaitEvent (st, ev, 0)) != OK_CUDA){ \
std::stringstream params; \
params << "event=" << ev << " stream=" << st; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"Synchronizing updateHH<->updateMat", \
error, params.str()); \
} \
}
#define CUDA_EVENT_SYNC(ev) \
{ \
cudaError_t error; \
if ((error = cudaEventSynchronize (ev)) != OK_CUDA){ \
std::stringstream params; \
params << "event=" << ev; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"Synchronizing updateHH<->updateMat", \
error, params.str()); \
} \
}
#else
#define CUDA_EVENT_RECORD(ev, st) \
{ \
cudaEventRecord(ev, st); \
}
#define CUDA_EVENT_WAIT(ev, st) \
{ \
cudaStreamWaitEvent (st, ev, 0); \
}
#define CUDA_EVENT_SYNC(ev) \
{ \
cudaEventSynchronize (ev); \
}
#endif
#define CUDA_CPY_SYNC(tar, src, len) \
{ \
cudaError_t error; \
std::stringstream params; \
error = cudaMemcpy (tar, src, len, cudaMemcpyDefault); \
if (error != OK_CUDA){ \
params << "src=" << src << " tar=" << tar << " len=" << len; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"Doing the memory transfer", \
error, params.str()); \
} \
}
#define CUDA_CPY(tar, src, len, st) \
{ \
cudaError_t error; \
std::stringstream params; \
if ((error = cudaMemcpyAsync(tar, src, len, cudaMemcpyDefault, st)) != OK_CUDA){ \
params << "src=" << src << " tar=" << tar << " len=" << len; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"Doing the memory transfer", \
error, params.str()); \
} \
}
#define CUDA_SET(tar, val, len, st) \
{ \
cudaError_t error; \
std::stringstream params; \
if ((error = cudaMemsetAsync(tar, val, len, st)) != OK_CUDA){ \
params << " tar=" << tar << " len=" << len; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"Doing the memory transfer", \
error, params.str()); \
} \
}
#define CUDA_FREE(ptr) \
{ \
cudaError_t error; \
std::stringstream params; \
if ((error = cudaFree(ptr)) != OK_CUDA){ \
params << "ptr=" << ptr; \
throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__, \
"When the memory release", error, params.str()); \
} \
}
#if TYPE_ID == FLOAT_ID
#define cusparseXcsrmv cusparseScsrmv
#define cublasXgemv cublasSgemv
#define cublasXot cublasSdot
#define cublasXaxpy cublasSaxpy
#define cublasXnorm cublasSnrm2
#elif TYPE_ID == DOUBLE_ID
#define cusparseXcsrmv cusparseDcsrmv
#define cublasXgemv cublasDgemv
#define cublasXot cublasDdot
#define cublasXaxpy cublasDaxpy
#define cublasXnorm cublasDnrm2
#else
#error "The variable TYPE_ID is not defined"
#endif
#if DEBUG >= DEBUG_KERNEL_LVL
#define KERNEL(func, dimG, dimB, stream, fmt, args...) \
_DEBUG(" Executing the kernel <%s> con:", #func); \
_DEBUG(" -Block mesh: [%d,%d,%d] ==> Total: %d", \
dimG.x, dimG.y, dimG.z, dimG.x * dimG.y * dimG.z); \
_DEBUG(" -Threads mesh: [%d,%d,%d] ==> Total: %d", \
dimB.x, dimB.y, dimB.z, dimB.x * dimB.y * dimB.z); \
_DEBUG(" -Param: "#fmt, args); \
func <<<dimG, dimB, 0, stream>>> (args); \
checkError(#func, __BASE_FILE__, __LINE__)
#define KERNEL1(func, t1, dimG, dimB, stream, fmt, args...) \
_DEBUG("Executing the kernel <%s> con:", #func); \
_DEBUG(" -Block mesh: [%d,%d,%d] ==> Total: %d", \
dimG.x, dimG.y, dimG.z, dimG.x * dimG.y * dimG.z); \
_DEBUG(" -Threads mesh: [%d,%d,%d] ==> Total: %d", \
dimB.x, dimB.y, dimB.z, dimB.x * dimB.y * dimB.z); \
_DEBUG(" -Template: %s=%d", #t1, t1); \
_DEBUG(" -Param: "#fmt, args); \
func <t1> <<<dimG, dimB, 0, stream>>> (args); \
checkError(#func, __BASE_FILE__, __LINE__)
#else
#define KERNEL(func, dimG, dimB, stream, fmt, args...) \
func <<<dimG, dimB, 0, stream>>> (args)
#define KERNEL1(func, t1, dimG, dimB, stream, fmt, args...) \
func <t1> <<<dimG, dimB, 0, stream>>> (args)
#endif
#define TEXT_CUBLAS(x) (x==CUBLAS_STATUS_SUCCESS)? "OK": \
(x==CUBLAS_STATUS_NOT_INITIALIZED)? "NOT INITIALIZED": \
(x==CUBLAS_STATUS_ALLOC_FAILED)? "ALLOC FAILED": \
(x==CUBLAS_STATUS_INVALID_VALUE)? "INVALID VALUE": \
(x==CUBLAS_STATUS_ARCH_MISMATCH)? "ARCH MISMATCH": \
(x==CUBLAS_STATUS_MAPPING_ERROR)? "MAPPING FAILED": \
(x==CUBLAS_STATUS_EXECUTION_FAILED)? "EXECUTION FAILED": \
(x==CUBLAS_STATUS_INTERNAL_ERROR)? "INTERNAL ERROR":"OTHER"
#define TEXT_CUSPARSE(x) (x==CUSPARSE_STATUS_SUCCESS)? "OK": \
(x==CUSPARSE_STATUS_NOT_INITIALIZED)? "NOT INITIALIZED": \
(x==CUSPARSE_STATUS_ALLOC_FAILED)? "ALLOC FAILED": \
(x==CUSPARSE_STATUS_INVALID_VALUE)? "INVALID VALUE": \
(x==CUSPARSE_STATUS_ARCH_MISMATCH)? "ARCH MISMATCH": \
(x==CUSPARSE_STATUS_MAPPING_ERROR)? "MAPPING FAILED": \
(x==CUSPARSE_STATUS_EXECUTION_FAILED)? "EXECUTION FAILED": \
(x==CUSPARSE_STATUS_INTERNAL_ERROR)? "INTERNAL ERROR": \
(x==CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)? "MATRIX TYPE NOT SUPPORTED":"OTHER"
#define TEXT_CUDA(x) (x==cudaSuccess)? "OK": \
(x==cudaErrorMissingConfiguration)?"cudaErrorMissingConfiguration" : \
(x==cudaErrorMemoryAllocation)?"cudaErrorMemoryAllocation" : \
(x==cudaErrorInitializationError)?"cudaErrorInitializationError" : \
(x==cudaErrorLaunchFailure)?"cudaErrorLaunchFailure" : \
(x==cudaErrorPriorLaunchFailure)?"cudaErrorPriorLaunchFailure" : \
(x==cudaErrorLaunchTimeout)?"cudaErrorLaunchTimeout" : \
(x==cudaErrorLaunchOutOfResources)?"cudaErrorLaunchOutOfResourc" : \
(x==cudaErrorInvalidDeviceFunction)?"cudaErrorInvalidDeviceFunction" : \
(x==cudaErrorUnmapBufferObjectFailed)?"cudaErrorUnmapBufferObjectFailed" : \
(x==cudaErrorInvalidDevice)?"cudaErrorInvalidDevice" : \
(x==cudaErrorInvalidValue)?"cudaErrorInvalidValue" : \
(x==cudaErrorInvalidPitchValue)?"cudaErrorInvalidPitchValue" : \
(x==cudaErrorInvalidSymbol)?"cudaErrorInvalidSymbol" : \
(x==cudaErrorMapBufferObjectFailed)?"cudaErrorMapBufferObjectFailed" : \
(x==cudaErrorUnmapBufferObjectFailed)?"cudaErrorUnmapBufferObjectFailed" : \
(x==cudaErrorInvalidHostPointer)?"cudaErrorInvalidHostPointer" : \
(x==cudaErrorInvalidDevicePointer)?"cudaErrorInvalidDevicePointer" : \
(x==cudaErrorInvalidTexture)?"cudaErrorInvalidTexture" : \
(x==cudaErrorInvalidTextureBinding)?"cudaErrorInvalidTextureBinding" : \
(x==cudaErrorInvalidChannelDescriptor)?"cudaErrorInvalidChannelDescriptor" : \
(x==cudaErrorInvalidMemcpyDirection)?"cudaErrorInvalidMemcpyDirection" : \
(x==cudaErrorAddressOfConstant)?"cudaErrorAddressOfConstant" : \
(x==cudaErrorTextureFetchFailed)?"cudaErrorTextureFetchFailed" : \
(x==cudaErrorTextureNotBound)?"cudaErrorTextureNotBound" : \
(x==cudaErrorSynchronizationError)?"cudaErrorSynchronizationError" : \
(x==cudaErrorInvalidFilterSetting)?"cudaErrorInvalidFilterSetting" : \
(x==cudaErrorInvalidNormSetting)?"cudaErrorInvalidNormSetting" : \
(x==cudaErrorMixedDeviceExecution)?"cudaErrorMixedDeviceExecution" : \
(x==cudaErrorCudartUnloading)?"cudaErrorCudartUnloading" : \
(x==cudaErrorUnknown)?"cudaErrorUnknown" : \
(x==cudaErrorNotYetImplemented)?"cudaErrorNotYetImplemented" : \
(x==cudaErrorMemoryValueTooLarge)? "cudaErrorMemoryValueTooLarge": \
(x==cudaErrorInvalidResourceHandle)?"cudaErrorInvalidResourceHandle": \
(x==cudaErrorNotReady)?"cudaErrorNotReady": \
(x==cudaErrorInsufficientDriver)?"cudaErrorInsufficientDriver" : \
(x==cudaErrorSetOnActiveProcess)?"cudaErrorSetOnActiveProcess" : \
(x==cudaErrorInvalidSurface)?"cudaErrorInvalidSurface" : \
(x==cudaErrorNoDevice)?"cudaErrorNoDevice" : \
(x==cudaErrorECCUncorrectable)?"cudaErrorECCUncorrectable" : \
(x==cudaErrorSharedObjectSymbolNotFound)?"cudaErrorSharedObjectSymbolNotFound" : \
(x==cudaErrorSharedObjectInitFailed)?"cudaErrorSharedObjectInitFailed" : \
(x==cudaErrorUnsupportedLimit)?"cudaErrorUnsupportedLimit" : \
(x==cudaErrorDuplicateVariableName)?"cudaErrorDuplicateVariableName" : \
(x==cudaErrorDuplicateTextureName)?"cudaErrorDuplicateTextureName" : \
(x==cudaErrorDuplicateSurfaceName)? "cudaErrorDuplicateSurfaceName": \
(x==cudaErrorDevicesUnavailable)?"cudaErrorDevicesUnavailable" : \
(x==cudaErrorInvalidKernelImage)?"cudaErrorInvalidKernelImage" : \
(x==cudaErrorNoKernelImageForDevice)?"cudaErrorNoKernelImageForDevice" : \
(x==cudaErrorIncompatibleDriverContext)?"cudaErrorIncompatibleDriverContext" : \
(x==cudaErrorPeerAccessAlreadyEnabled)?"cudaErrorPeerAccessAlreadyEnabled" : \
(x==cudaErrorPeerAccessNotEnabled)?"cudaErrorPeerAccessNotEnabled" : \
(x==cudaErrorDeviceAlreadyInUse)?"cudaErrorDeviceAlreadyInUse" : \
(x==cudaErrorProfilerDisabled)?"cudaErrorProfilerDisabled" : \
(x==cudaErrorProfilerNotInitialized)?"cudaErrorProfilerNotInitialized" : \
(x==cudaErrorProfilerAlreadyStarted)?"cudaErrorProfilerAlreadyStarted" : \
(x==cudaErrorProfilerAlreadyStopped)?"cudaErrorProfilerAlreadyStopped" : \
(x==cudaErrorStartupFailure)?"cudaErrorStartupFailure" : \
(x==cudaErrorApiFailureBase)?"cudaErrorApiFailureBase" : "other"
class cudaException : public std::exception {
private:
std::string msg;
std::string type;
std::string desc;
std::string param;
char * file;
const char * func;
int line;
public:
cudaException(char *file, const char *func, int line,
const std::string& msg, cudaError_t error,
const std::string& params);
cudaException(char *file, const char *func, int line,
const std::string& msg, cublasStatus_t error,
const std::string& params);
cudaException(char *file, const char *func, int line,
const std::string& msg, cusparseStatus_t error,
const std::string& params);
virtual const char* what() const throw();
void print() throw();
std::string get_type() throw();
std::string get_desc() throw();
std::string get_param() throw();
virtual ~cudaException() throw();
};
extern void checkError(const char *func, char *file, const int line);
#endif