diff options
author | selaselah | 2017-09-22 11:43:16 -0500 |
---|---|---|
committer | Daniel Povey | 2017-09-22 11:43:16 -0500 |
commit | 633310c0d69478dc39027696751e874d4bc27862 (patch) | |
tree | ded6d7032e08d919570c34a27dbcbfa713cbadfb | |
parent | ecd48ca7f9b9af116d4cdb5bcd65116311ff518c (diff) | |
download | kaldi-633310c0d69478dc39027696751e874d4bc27862.tar.gz kaldi-633310c0d69478dc39027696751e874d4bc27862.tar.xz kaldi-633310c0d69478dc39027696751e874d4bc27862.zip |
[src] Fix CU_SAFE_CALL wrapper so it correctly prints CuBLAS error codes (#1900)
-rw-r--r-- | src/cudamatrix/cu-common.cc | 17 | ||||
-rw-r--r-- | src/cudamatrix/cu-common.h | 12 | ||||
-rw-r--r-- | src/cudamatrix/cu-device.cc | 2 | ||||
-rw-r--r-- | src/cudamatrix/cu-matrix.cc | 27 | ||||
-rw-r--r-- | src/cudamatrix/cu-packed-matrix.cc | 2 | ||||
-rw-r--r-- | src/cudamatrix/cu-sp-matrix.cc | 4 | ||||
-rw-r--r-- | src/cudamatrix/cu-tp-matrix.cc | 2 | ||||
-rw-r--r-- | src/cudamatrix/cu-vector.cc | 24 |
8 files changed, 58 insertions, 32 deletions
diff --git a/src/cudamatrix/cu-common.cc b/src/cudamatrix/cu-common.cc index dc88b1e16..89de8c62e 100644 --- a/src/cudamatrix/cu-common.cc +++ b/src/cudamatrix/cu-common.cc | |||
@@ -67,6 +67,23 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows, | |||
67 | "Matrix has too many rows to process"); | 67 | "Matrix has too many rows to process"); |
68 | dimGrid->z = 1; | 68 | dimGrid->z = 1; |
69 | } | 69 | } |
70 | |||
71 | const char* cublasGetStatusString(cublasStatus_t status) { | ||
72 | switch(status) { | ||
73 | case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; | ||
74 | case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED"; | ||
75 | case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED"; | ||
76 | case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE"; | ||
77 | case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH"; | ||
78 | case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR"; | ||
79 | case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED"; | ||
80 | case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR"; | ||
81 | case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED"; | ||
82 | case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR"; | ||
83 | } | ||
84 | return "CUBLAS_STATUS_UNKNOWN_ERROR"; | ||
85 | } | ||
86 | |||
70 | #endif | 87 | #endif |
71 | 88 | ||
72 | } // namespace | 89 | } // namespace |
diff --git a/src/cudamatrix/cu-common.h b/src/cudamatrix/cu-common.h index f151a01a9..10e70bc7d 100644 --- a/src/cudamatrix/cu-common.h +++ b/src/cudamatrix/cu-common.h | |||
@@ -33,8 +33,6 @@ | |||
33 | #include <cublas_v2.h> | 33 | #include <cublas_v2.h> |
34 | #include <cuda_runtime_api.h> | 34 | #include <cuda_runtime_api.h> |
35 | 35 | ||
36 | |||
37 | |||
38 | #define CU_SAFE_CALL(fun) \ | 36 | #define CU_SAFE_CALL(fun) \ |
39 | { \ | 37 | { \ |
40 | int32 ret; \ | 38 | int32 ret; \ |
@@ -43,6 +41,14 @@ | |||
43 | } \ | 41 | } \ |
44 | } | 42 | } |
45 | 43 | ||
44 | #define CUBLAS_SAFE_CALL(fun) \ | ||
45 | { \ | ||
46 | int32 ret; \ | ||
47 | if ((ret = (fun)) != 0) { \ | ||
48 | KALDI_ERR << "cublasStatus_t " << ret << " : \"" << cublasGetStatusString((cublasStatus_t)ret) << "\" returned from '" << #fun << "'"; \ | ||
49 | } \ | ||
50 | } | ||
51 | |||
46 | #define KALDI_CUDA_ERR(ret, msg) \ | 52 | #define KALDI_CUDA_ERR(ret, msg) \ |
47 | { \ | 53 | { \ |
48 | if (ret != 0) { \ | 54 | if (ret != 0) { \ |
@@ -75,6 +81,8 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows, | |||
75 | dim3 *dimGrid, | 81 | dim3 *dimGrid, |
76 | dim3 *dimBlock); | 82 | dim3 *dimBlock); |
77 | 83 | ||
84 | /** This is analogous to the CUDA function cudaGetErrorString(). **/ | ||
85 | const char* cublasGetStatusString(cublasStatus_t status); | ||
78 | 86 | ||
79 | } | 87 | } |
80 | 88 | ||
diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc index e54b57530..92ef0c02d 100644 --- a/src/cudamatrix/cu-device.cc +++ b/src/cudamatrix/cu-device.cc | |||
@@ -214,7 +214,7 @@ void CuDevice::FinalizeActiveGpu() { | |||
214 | // Remember the id of active GPU | 214 | // Remember the id of active GPU |
215 | active_gpu_id_ = act_gpu_id; // CuDevice::Enabled() is true from now on | 215 | active_gpu_id_ = act_gpu_id; // CuDevice::Enabled() is true from now on |
216 | // Initialize the CUBLAS | 216 | // Initialize the CUBLAS |
217 | CU_SAFE_CALL(cublasCreate(&handle_)); | 217 | CUBLAS_SAFE_CALL(cublasCreate(&handle_)); |
218 | 218 | ||
219 | // Notify user which GPU is finally used | 219 | // Notify user which GPU is finally used |
220 | char name[128]; | 220 | char name[128]; |
diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index 30a79b5c9..5860c1938 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc | |||
@@ -1161,7 +1161,7 @@ void CuMatrixBase<Real>::AddMatMat( | |||
1161 | #if HAVE_CUDA == 1 | 1161 | #if HAVE_CUDA == 1 |
1162 | if (CuDevice::Instantiate().Enabled()) { | 1162 | if (CuDevice::Instantiate().Enabled()) { |
1163 | CuTimer tim; | 1163 | CuTimer tim; |
1164 | CU_SAFE_CALL(cublas_gemm(GetCublasHandle(), | 1164 | CUBLAS_SAFE_CALL(cublas_gemm(GetCublasHandle(), |
1165 | (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), | 1165 | (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), |
1166 | (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), | 1166 | (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), |
1167 | m, n, k, alpha, B.data_, B.Stride(), | 1167 | m, n, k, alpha, B.data_, B.Stride(), |
@@ -1188,8 +1188,8 @@ void CuMatrixBase<Real>::AddVecVec( | |||
1188 | #if HAVE_CUDA == 1 | 1188 | #if HAVE_CUDA == 1 |
1189 | if (CuDevice::Instantiate().Enabled()) { | 1189 | if (CuDevice::Instantiate().Enabled()) { |
1190 | CuTimer tim; | 1190 | CuTimer tim; |
1191 | CU_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha, | 1191 | CUBLAS_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha, |
1192 | y.Data(), 1, x.Data(), 1, data_, Stride())); | 1192 | y.Data(), 1, x.Data(), 1, data_, Stride())); |
1193 | 1193 | ||
1194 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 1194 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
1195 | } else | 1195 | } else |
@@ -1215,9 +1215,10 @@ void CuMatrixBase<Real>::SymAddMat2( | |||
1215 | CuTimer tim; | 1215 | CuTimer tim; |
1216 | cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T); | 1216 | cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T); |
1217 | MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_); | 1217 | MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_); |
1218 | CU_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, | 1218 | CUBLAS_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, |
1219 | num_rows_, A_other_dim, alpha, A.Data(), A.Stride(), | 1219 | trans, num_rows_, A_other_dim, |
1220 | beta, this->data_, this->stride_)); | 1220 | alpha, A.Data(), A.Stride(), |
1221 | beta, this->data_, this->stride_)); | ||
1221 | 1222 | ||
1222 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 1223 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
1223 | } else | 1224 | } else |
@@ -2106,13 +2107,13 @@ void AddMatMatBatched(const Real alpha, std::vector<CuSubMatrix<Real>* > &C, | |||
2106 | 2107 | ||
2107 | CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice)); | 2108 | CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice)); |
2108 | 2109 | ||
2109 | CU_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), | 2110 | CUBLAS_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), |
2110 | (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), | 2111 | (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), |
2111 | (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), | 2112 | (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), |
2112 | m, n, k, alpha, device_b_array, | 2113 | m, n, k, alpha, device_b_array, |
2113 | B[0]->Stride(), device_a_array, | 2114 | B[0]->Stride(), device_a_array, |
2114 | A[0]->Stride(), beta, device_c_array, | 2115 | A[0]->Stride(), beta, device_c_array, |
2115 | C[0]->Stride(), size)); | 2116 | C[0]->Stride(), size)); |
2116 | 2117 | ||
2117 | CuDevice::Instantiate().Free(device_abc_array); | 2118 | CuDevice::Instantiate().Free(device_abc_array); |
2118 | delete[] host_abc_array; | 2119 | delete[] host_abc_array; |
diff --git a/src/cudamatrix/cu-packed-matrix.cc b/src/cudamatrix/cu-packed-matrix.cc index 66d7e4102..eeccae94f 100644 --- a/src/cudamatrix/cu-packed-matrix.cc +++ b/src/cudamatrix/cu-packed-matrix.cc | |||
@@ -298,7 +298,7 @@ void CuPackedMatrix<Real>::Scale(Real alpha) { | |||
298 | CuTimer tim; | 298 | CuTimer tim; |
299 | size_t nr = static_cast<size_t>(num_rows_), | 299 | size_t nr = static_cast<size_t>(num_rows_), |
300 | num_elements = ((nr * (nr+1)) / 2); | 300 | num_elements = ((nr * (nr+1)) / 2); |
301 | CU_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1)); | 301 | CUBLAS_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1)); |
302 | 302 | ||
303 | CuDevice::Instantiate().AccuProfile("CuPackedMatrix::Scale", tim); | 303 | CuDevice::Instantiate().AccuProfile("CuPackedMatrix::Scale", tim); |
304 | } else | 304 | } else |
diff --git a/src/cudamatrix/cu-sp-matrix.cc b/src/cudamatrix/cu-sp-matrix.cc index 4d6905c44..1360c7b4f 100644 --- a/src/cudamatrix/cu-sp-matrix.cc +++ b/src/cudamatrix/cu-sp-matrix.cc | |||
@@ -115,8 +115,8 @@ void CuSpMatrix<Real>::AddVec2(const Real alpha, const CuVectorBase<Real> &v) { | |||
115 | dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); | 115 | dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); |
116 | dim3 dimGrid(n_blocks(nr, CU2DBLOCK), n_blocks(nr, CU2DBLOCK)); | 116 | dim3 dimGrid(n_blocks(nr, CU2DBLOCK), n_blocks(nr, CU2DBLOCK)); |
117 | 117 | ||
118 | CU_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(), | 118 | CUBLAS_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(), |
119 | 1, this->Data())); | 119 | 1, this->Data())); |
120 | 120 | ||
121 | CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddVec2", tim); | 121 | CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddVec2", tim); |
122 | } else | 122 | } else |
diff --git a/src/cudamatrix/cu-tp-matrix.cc b/src/cudamatrix/cu-tp-matrix.cc index cc575e4a0..377c34239 100644 --- a/src/cudamatrix/cu-tp-matrix.cc +++ b/src/cudamatrix/cu-tp-matrix.cc | |||
@@ -74,7 +74,7 @@ void CuTpMatrix<Real>::Invert() { | |||
74 | CU_SAFE_CALL(cudaGetLastError()); | 74 | CU_SAFE_CALL(cudaGetLastError()); |
75 | CuMatrix<Real> tmp2(dim, dim); | 75 | CuMatrix<Real> tmp2(dim, dim); |
76 | tmp2.CopyFromTp(*this); | 76 | tmp2.CopyFromTp(*this); |
77 | CU_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride, | 77 | CUBLAS_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride, |
78 | tmp.Data(), tmp.Dim().stride)); | 78 | tmp.Data(), tmp.Dim().stride)); |
79 | this->CopyFromMat(tmp, kNoTrans); | 79 | this->CopyFromMat(tmp, kNoTrans); |
80 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 80 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index 595fd0aaa..143bd5115 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc | |||
@@ -49,8 +49,8 @@ Real VecVec(const CuVectorBase<Real> &a, | |||
49 | #if HAVE_CUDA == 1 | 49 | #if HAVE_CUDA == 1 |
50 | if (CuDevice::Instantiate().Enabled()) { | 50 | if (CuDevice::Instantiate().Enabled()) { |
51 | CuTimer tim; | 51 | CuTimer tim; |
52 | CU_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(), | 52 | CUBLAS_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(), |
53 | 1, &result)); | 53 | 1, &result)); |
54 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 54 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
55 | } else | 55 | } else |
56 | #endif | 56 | #endif |
@@ -451,10 +451,10 @@ void CuVectorBase<Real>::AddMatVec(const Real alpha, | |||
451 | 451 | ||
452 | // Everything is backwards in CuBlas. We need to reverse rows, columns, | 452 | // Everything is backwards in CuBlas. We need to reverse rows, columns, |
453 | // transpose-ness. | 453 | // transpose-ness. |
454 | CU_SAFE_CALL(cublas_gemv(GetCublasHandle(), | 454 | CUBLAS_SAFE_CALL(cublas_gemv(GetCublasHandle(), |
455 | (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), | 455 | (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), |
456 | M.NumCols(), M.NumRows(), alpha, M.Data(), | 456 | M.NumCols(), M.NumRows(), alpha, M.Data(), |
457 | M.Stride(), v.Data(), 1, beta, data_, 1)); | 457 | M.Stride(), v.Data(), 1, beta, data_, 1)); |
458 | 458 | ||
459 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 459 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
460 | } else | 460 | } else |
@@ -478,8 +478,8 @@ void CuVectorBase<Real>::AddSpVec(const Real alpha, | |||
478 | 478 | ||
479 | // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but | 479 | // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but |
480 | // in CUBLAS, for some stupid reason, everything is reversed. | 480 | // in CUBLAS, for some stupid reason, everything is reversed. |
481 | CU_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(), | 481 | CUBLAS_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(), |
482 | alpha, M.Data(), v.Data(), 1, beta, data_, 1)); | 482 | alpha, M.Data(), v.Data(), 1, beta, data_, 1)); |
483 | 483 | ||
484 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 484 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
485 | } else | 485 | } else |
@@ -775,7 +775,7 @@ void CuVectorBase<double>::CopyFromVec(const CuVectorBase<float> &src) { | |||
775 | if (CuDevice::Instantiate().Enabled()) { | 775 | if (CuDevice::Instantiate().Enabled()) { |
776 | if (dim_ == 0) return; | 776 | if (dim_ == 0) return; |
777 | CuTimer tim; | 777 | CuTimer tim; |
778 | CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); | 778 | CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); |
779 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 779 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
780 | } else | 780 | } else |
781 | #endif | 781 | #endif |
@@ -792,7 +792,7 @@ void CuVectorBase<float>::CopyFromVec(const CuVectorBase<double> &src) { | |||
792 | if (CuDevice::Instantiate().Enabled()) { | 792 | if (CuDevice::Instantiate().Enabled()) { |
793 | if (dim_ == 0) return; | 793 | if (dim_ == 0) return; |
794 | CuTimer tim; | 794 | CuTimer tim; |
795 | CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); | 795 | CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); |
796 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 796 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
797 | } else | 797 | } else |
798 | #endif | 798 | #endif |
@@ -1089,8 +1089,8 @@ void CuVectorBase<Real>::CopyDiagFromMat(const CuMatrix<Real> &M) { | |||
1089 | if (CuDevice::Instantiate().Enabled()) { | 1089 | if (CuDevice::Instantiate().Enabled()) { |
1090 | KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); | 1090 | KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); |
1091 | CuTimer tim; | 1091 | CuTimer tim; |
1092 | CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1, | 1092 | CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1, |
1093 | data_, 1)); | 1093 | data_, 1)); |
1094 | 1094 | ||
1095 | CuDevice::Instantiate().AccuProfile(__func__, tim); | 1095 | CuDevice::Instantiate().AccuProfile(__func__, tim); |
1096 | } else | 1096 | } else |