From 633310c0d69478dc39027696751e874d4bc27862 Mon Sep 17 00:00:00 2001 From: selaselah Date: Sat, 23 Sep 2017 00:43:16 +0800 Subject: [PATCH] [src] Fix CU_SAFE_CALL wrapper so it correctly prints CuBLAS error codes (#1900) --- src/cudamatrix/cu-common.cc | 17 +++++++++++++++++ src/cudamatrix/cu-common.h | 12 ++++++++++-- src/cudamatrix/cu-device.cc | 2 +- src/cudamatrix/cu-matrix.cc | 27 ++++++++++++++------------- src/cudamatrix/cu-packed-matrix.cc | 2 +- src/cudamatrix/cu-sp-matrix.cc | 4 ++-- src/cudamatrix/cu-tp-matrix.cc | 2 +- src/cudamatrix/cu-vector.cc | 24 ++++++++++++------------ 8 files changed, 58 insertions(+), 32 deletions(-) diff --git a/src/cudamatrix/cu-common.cc b/src/cudamatrix/cu-common.cc index dc88b1e16..89de8c62e 100644 --- a/src/cudamatrix/cu-common.cc +++ b/src/cudamatrix/cu-common.cc @@ -67,6 +67,23 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows, "Matrix has too many rows to process"); dimGrid->z = 1; } + +const char* cublasGetStatusString(cublasStatus_t status) { + switch(status) { + case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR"; + case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED"; + case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR"; + } + return "CUBLAS_STATUS_UNKNOWN_ERROR"; +} + #endif } // namespace diff --git a/src/cudamatrix/cu-common.h b/src/cudamatrix/cu-common.h index f151a01a9..10e70bc7d 100644 --- a/src/cudamatrix/cu-common.h +++ b/src/cudamatrix/cu-common.h @@ -33,8 +33,6 @@ #include #include - - #define CU_SAFE_CALL(fun) \ { \ int32 ret; \ @@ -43,6 +41,14 @@ } \ } +#define CUBLAS_SAFE_CALL(fun) \ +{ \ + int32 ret; \ + if ((ret = (fun)) != 0) { \ + KALDI_ERR << "cublasStatus_t " << ret << " : \"" << cublasGetStatusString((cublasStatus_t)ret) << "\" returned from '" << #fun << "'"; \ + } \ +} + #define KALDI_CUDA_ERR(ret, msg) \ { \ if (ret != 0) { \ @@ -75,6 +81,8 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows, dim3 *dimGrid, dim3 *dimBlock); +/** This is analogous to the CUDA function cudaGetErrorString(). **/ +const char* cublasGetStatusString(cublasStatus_t status); } diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc index e54b57530..92ef0c02d 100644 --- a/src/cudamatrix/cu-device.cc +++ b/src/cudamatrix/cu-device.cc @@ -214,7 +214,7 @@ void CuDevice::FinalizeActiveGpu() { // Remember the id of active GPU active_gpu_id_ = act_gpu_id; // CuDevice::Enabled() is true from now on // Initialize the CUBLAS - CU_SAFE_CALL(cublasCreate(&handle_)); + CUBLAS_SAFE_CALL(cublasCreate(&handle_)); // Notify user which GPU is finally used char name[128]; diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index 30a79b5c9..5860c1938 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -1161,7 +1161,7 @@ void CuMatrixBase::AddMatMat( #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CU_SAFE_CALL(cublas_gemm(GetCublasHandle(), + CUBLAS_SAFE_CALL(cublas_gemm(GetCublasHandle(), (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), m, n, k, alpha, B.data_, B.Stride(), @@ -1188,8 +1188,8 @@ void CuMatrixBase::AddVecVec( #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CU_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha, - y.Data(), 1, x.Data(), 1, data_, Stride())); + CUBLAS_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha, + y.Data(), 1, x.Data(), 1, data_, Stride())); CuDevice::Instantiate().AccuProfile(__func__, tim); } else @@ -1215,9 +1215,10 @@ void CuMatrixBase::SymAddMat2( CuTimer tim; cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T); MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_); - CU_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, - num_rows_, A_other_dim, alpha, A.Data(), A.Stride(), - beta, this->data_, this->stride_)); + CUBLAS_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, + trans, num_rows_, A_other_dim, + alpha, A.Data(), A.Stride(), + beta, this->data_, this->stride_)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else @@ -2106,13 +2107,13 @@ void AddMatMatBatched(const Real alpha, std::vector* > &C, CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice)); - CU_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), - (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - m, n, k, alpha, device_b_array, - B[0]->Stride(), device_a_array, - A[0]->Stride(), beta, device_c_array, - C[0]->Stride(), size)); + CUBLAS_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), + (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + m, n, k, alpha, device_b_array, + B[0]->Stride(), device_a_array, + A[0]->Stride(), beta, device_c_array, + C[0]->Stride(), size)); CuDevice::Instantiate().Free(device_abc_array); delete[] host_abc_array; diff --git a/src/cudamatrix/cu-packed-matrix.cc b/src/cudamatrix/cu-packed-matrix.cc index 66d7e4102..eeccae94f 100644 --- a/src/cudamatrix/cu-packed-matrix.cc +++ b/src/cudamatrix/cu-packed-matrix.cc @@ -298,7 +298,7 @@ void CuPackedMatrix::Scale(Real alpha) { CuTimer tim; size_t nr = static_cast(num_rows_), num_elements = ((nr * (nr+1)) / 2); - CU_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1)); + CUBLAS_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1)); CuDevice::Instantiate().AccuProfile("CuPackedMatrix::Scale", tim); } else diff --git a/src/cudamatrix/cu-sp-matrix.cc b/src/cudamatrix/cu-sp-matrix.cc index 4d6905c44..1360c7b4f 100644 --- a/src/cudamatrix/cu-sp-matrix.cc +++ b/src/cudamatrix/cu-sp-matrix.cc @@ -115,8 +115,8 @@ void CuSpMatrix::AddVec2(const Real alpha, const CuVectorBase &v) { dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); dim3 dimGrid(n_blocks(nr, CU2DBLOCK), n_blocks(nr, CU2DBLOCK)); - CU_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(), - 1, this->Data())); + CUBLAS_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(), + 1, this->Data())); CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddVec2", tim); } else diff --git a/src/cudamatrix/cu-tp-matrix.cc b/src/cudamatrix/cu-tp-matrix.cc index cc575e4a0..377c34239 100644 --- a/src/cudamatrix/cu-tp-matrix.cc +++ b/src/cudamatrix/cu-tp-matrix.cc @@ -74,7 +74,7 @@ void CuTpMatrix::Invert() { CU_SAFE_CALL(cudaGetLastError()); CuMatrix tmp2(dim, dim); tmp2.CopyFromTp(*this); - CU_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride, + CUBLAS_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride, tmp.Data(), tmp.Dim().stride)); this->CopyFromMat(tmp, kNoTrans); CuDevice::Instantiate().AccuProfile(__func__, tim); diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index 595fd0aaa..143bd5115 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc @@ -49,8 +49,8 @@ Real VecVec(const CuVectorBase &a, #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { CuTimer tim; - CU_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(), - 1, &result)); + CUBLAS_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(), + 1, &result)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -451,10 +451,10 @@ void CuVectorBase::AddMatVec(const Real alpha, // Everything is backwards in CuBlas. We need to reverse rows, columns, // transpose-ness. - CU_SAFE_CALL(cublas_gemv(GetCublasHandle(), - (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), - M.NumCols(), M.NumRows(), alpha, M.Data(), - M.Stride(), v.Data(), 1, beta, data_, 1)); + CUBLAS_SAFE_CALL(cublas_gemv(GetCublasHandle(), + (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), + M.NumCols(), M.NumRows(), alpha, M.Data(), + M.Stride(), v.Data(), 1, beta, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else @@ -478,8 +478,8 @@ void CuVectorBase::AddSpVec(const Real alpha, // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but // in CUBLAS, for some stupid reason, everything is reversed. - CU_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(), - alpha, M.Data(), v.Data(), 1, beta, data_, 1)); + CUBLAS_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(), + alpha, M.Data(), v.Data(), 1, beta, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else @@ -775,7 +775,7 @@ void CuVectorBase::CopyFromVec(const CuVectorBase &src) { if (CuDevice::Instantiate().Enabled()) { if (dim_ == 0) return; CuTimer tim; - CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); + CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -792,7 +792,7 @@ void CuVectorBase::CopyFromVec(const CuVectorBase &src) { if (CuDevice::Instantiate().Enabled()) { if (dim_ == 0) return; CuTimer tim; - CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); + CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else #endif @@ -1089,8 +1089,8 @@ void CuVectorBase::CopyDiagFromMat(const CuMatrix &M) { if (CuDevice::Instantiate().Enabled()) { KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); CuTimer tim; - CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1, - data_, 1)); + CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1, + data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim); } else -- 2.39.2