aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorselaselah2017-09-22 11:43:16 -0500
committerDaniel Povey2017-09-22 11:43:16 -0500
commit633310c0d69478dc39027696751e874d4bc27862 (patch)
treeded6d7032e08d919570c34a27dbcbfa713cbadfb
parentecd48ca7f9b9af116d4cdb5bcd65116311ff518c (diff)
downloadkaldi-633310c0d69478dc39027696751e874d4bc27862.tar.gz
kaldi-633310c0d69478dc39027696751e874d4bc27862.tar.xz
kaldi-633310c0d69478dc39027696751e874d4bc27862.zip
[src] Fix CU_SAFE_CALL wrapper so it correctly prints CuBLAS error codes (#1900)
-rw-r--r--src/cudamatrix/cu-common.cc17
-rw-r--r--src/cudamatrix/cu-common.h12
-rw-r--r--src/cudamatrix/cu-device.cc2
-rw-r--r--src/cudamatrix/cu-matrix.cc27
-rw-r--r--src/cudamatrix/cu-packed-matrix.cc2
-rw-r--r--src/cudamatrix/cu-sp-matrix.cc4
-rw-r--r--src/cudamatrix/cu-tp-matrix.cc2
-rw-r--r--src/cudamatrix/cu-vector.cc24
8 files changed, 58 insertions, 32 deletions
diff --git a/src/cudamatrix/cu-common.cc b/src/cudamatrix/cu-common.cc
index dc88b1e16..89de8c62e 100644
--- a/src/cudamatrix/cu-common.cc
+++ b/src/cudamatrix/cu-common.cc
@@ -67,6 +67,23 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows,
67 "Matrix has too many rows to process"); 67 "Matrix has too many rows to process");
68 dimGrid->z = 1; 68 dimGrid->z = 1;
69} 69}
70
71const char* cublasGetStatusString(cublasStatus_t status) {
72 switch(status) {
73 case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS";
74 case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED";
75 case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED";
76 case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE";
77 case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH";
78 case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR";
79 case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED";
80 case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR";
81 case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED";
82 case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR";
83 }
84 return "CUBLAS_STATUS_UNKNOWN_ERROR";
85}
86
70#endif 87#endif
71 88
72} // namespace 89} // namespace
diff --git a/src/cudamatrix/cu-common.h b/src/cudamatrix/cu-common.h
index f151a01a9..10e70bc7d 100644
--- a/src/cudamatrix/cu-common.h
+++ b/src/cudamatrix/cu-common.h
@@ -33,8 +33,6 @@
33#include <cublas_v2.h> 33#include <cublas_v2.h>
34#include <cuda_runtime_api.h> 34#include <cuda_runtime_api.h>
35 35
36
37
38#define CU_SAFE_CALL(fun) \ 36#define CU_SAFE_CALL(fun) \
39{ \ 37{ \
40 int32 ret; \ 38 int32 ret; \
@@ -43,6 +41,14 @@
43 } \ 41 } \
44} 42}
45 43
44#define CUBLAS_SAFE_CALL(fun) \
45{ \
46 int32 ret; \
47 if ((ret = (fun)) != 0) { \
48 KALDI_ERR << "cublasStatus_t " << ret << " : \"" << cublasGetStatusString((cublasStatus_t)ret) << "\" returned from '" << #fun << "'"; \
49 } \
50}
51
46#define KALDI_CUDA_ERR(ret, msg) \ 52#define KALDI_CUDA_ERR(ret, msg) \
47{ \ 53{ \
48 if (ret != 0) { \ 54 if (ret != 0) { \
@@ -75,6 +81,8 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows,
75 dim3 *dimGrid, 81 dim3 *dimGrid,
76 dim3 *dimBlock); 82 dim3 *dimBlock);
77 83
84/** This is analogous to the CUDA function cudaGetErrorString(). **/
85const char* cublasGetStatusString(cublasStatus_t status);
78 86
79} 87}
80 88
diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc
index e54b57530..92ef0c02d 100644
--- a/src/cudamatrix/cu-device.cc
+++ b/src/cudamatrix/cu-device.cc
@@ -214,7 +214,7 @@ void CuDevice::FinalizeActiveGpu() {
214 // Remember the id of active GPU 214 // Remember the id of active GPU
215 active_gpu_id_ = act_gpu_id; // CuDevice::Enabled() is true from now on 215 active_gpu_id_ = act_gpu_id; // CuDevice::Enabled() is true from now on
216 // Initialize the CUBLAS 216 // Initialize the CUBLAS
217 CU_SAFE_CALL(cublasCreate(&handle_)); 217 CUBLAS_SAFE_CALL(cublasCreate(&handle_));
218 218
219 // Notify user which GPU is finally used 219 // Notify user which GPU is finally used
220 char name[128]; 220 char name[128];
diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc
index 30a79b5c9..5860c1938 100644
--- a/src/cudamatrix/cu-matrix.cc
+++ b/src/cudamatrix/cu-matrix.cc
@@ -1161,7 +1161,7 @@ void CuMatrixBase<Real>::AddMatMat(
1161#if HAVE_CUDA == 1 1161#if HAVE_CUDA == 1
1162 if (CuDevice::Instantiate().Enabled()) { 1162 if (CuDevice::Instantiate().Enabled()) {
1163 CuTimer tim; 1163 CuTimer tim;
1164 CU_SAFE_CALL(cublas_gemm(GetCublasHandle(), 1164 CUBLAS_SAFE_CALL(cublas_gemm(GetCublasHandle(),
1165 (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), 1165 (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
1166 (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), 1166 (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
1167 m, n, k, alpha, B.data_, B.Stride(), 1167 m, n, k, alpha, B.data_, B.Stride(),
@@ -1188,8 +1188,8 @@ void CuMatrixBase<Real>::AddVecVec(
1188#if HAVE_CUDA == 1 1188#if HAVE_CUDA == 1
1189 if (CuDevice::Instantiate().Enabled()) { 1189 if (CuDevice::Instantiate().Enabled()) {
1190 CuTimer tim; 1190 CuTimer tim;
1191 CU_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha, 1191 CUBLAS_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha,
1192 y.Data(), 1, x.Data(), 1, data_, Stride())); 1192 y.Data(), 1, x.Data(), 1, data_, Stride()));
1193 1193
1194 CuDevice::Instantiate().AccuProfile(__func__, tim); 1194 CuDevice::Instantiate().AccuProfile(__func__, tim);
1195 } else 1195 } else
@@ -1215,9 +1215,10 @@ void CuMatrixBase<Real>::SymAddMat2(
1215 CuTimer tim; 1215 CuTimer tim;
1216 cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T); 1216 cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T);
1217 MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_); 1217 MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_);
1218 CU_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, 1218 CUBLAS_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER,
1219 num_rows_, A_other_dim, alpha, A.Data(), A.Stride(), 1219 trans, num_rows_, A_other_dim,
1220 beta, this->data_, this->stride_)); 1220 alpha, A.Data(), A.Stride(),
1221 beta, this->data_, this->stride_));
1221 1222
1222 CuDevice::Instantiate().AccuProfile(__func__, tim); 1223 CuDevice::Instantiate().AccuProfile(__func__, tim);
1223 } else 1224 } else
@@ -2106,13 +2107,13 @@ void AddMatMatBatched(const Real alpha, std::vector<CuSubMatrix<Real>* > &C,
2106 2107
2107 CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice)); 2108 CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice));
2108 2109
2109 CU_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), 2110 CUBLAS_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(),
2110 (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), 2111 (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
2111 (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), 2112 (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
2112 m, n, k, alpha, device_b_array, 2113 m, n, k, alpha, device_b_array,
2113 B[0]->Stride(), device_a_array, 2114 B[0]->Stride(), device_a_array,
2114 A[0]->Stride(), beta, device_c_array, 2115 A[0]->Stride(), beta, device_c_array,
2115 C[0]->Stride(), size)); 2116 C[0]->Stride(), size));
2116 2117
2117 CuDevice::Instantiate().Free(device_abc_array); 2118 CuDevice::Instantiate().Free(device_abc_array);
2118 delete[] host_abc_array; 2119 delete[] host_abc_array;
diff --git a/src/cudamatrix/cu-packed-matrix.cc b/src/cudamatrix/cu-packed-matrix.cc
index 66d7e4102..eeccae94f 100644
--- a/src/cudamatrix/cu-packed-matrix.cc
+++ b/src/cudamatrix/cu-packed-matrix.cc
@@ -298,7 +298,7 @@ void CuPackedMatrix<Real>::Scale(Real alpha) {
298 CuTimer tim; 298 CuTimer tim;
299 size_t nr = static_cast<size_t>(num_rows_), 299 size_t nr = static_cast<size_t>(num_rows_),
300 num_elements = ((nr * (nr+1)) / 2); 300 num_elements = ((nr * (nr+1)) / 2);
301 CU_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1)); 301 CUBLAS_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1));
302 302
303 CuDevice::Instantiate().AccuProfile("CuPackedMatrix::Scale", tim); 303 CuDevice::Instantiate().AccuProfile("CuPackedMatrix::Scale", tim);
304 } else 304 } else
diff --git a/src/cudamatrix/cu-sp-matrix.cc b/src/cudamatrix/cu-sp-matrix.cc
index 4d6905c44..1360c7b4f 100644
--- a/src/cudamatrix/cu-sp-matrix.cc
+++ b/src/cudamatrix/cu-sp-matrix.cc
@@ -115,8 +115,8 @@ void CuSpMatrix<Real>::AddVec2(const Real alpha, const CuVectorBase<Real> &v) {
115 dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); 115 dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
116 dim3 dimGrid(n_blocks(nr, CU2DBLOCK), n_blocks(nr, CU2DBLOCK)); 116 dim3 dimGrid(n_blocks(nr, CU2DBLOCK), n_blocks(nr, CU2DBLOCK));
117 117
118 CU_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(), 118 CUBLAS_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(),
119 1, this->Data())); 119 1, this->Data()));
120 120
121 CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddVec2", tim); 121 CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddVec2", tim);
122 } else 122 } else
diff --git a/src/cudamatrix/cu-tp-matrix.cc b/src/cudamatrix/cu-tp-matrix.cc
index cc575e4a0..377c34239 100644
--- a/src/cudamatrix/cu-tp-matrix.cc
+++ b/src/cudamatrix/cu-tp-matrix.cc
@@ -74,7 +74,7 @@ void CuTpMatrix<Real>::Invert() {
74 CU_SAFE_CALL(cudaGetLastError()); 74 CU_SAFE_CALL(cudaGetLastError());
75 CuMatrix<Real> tmp2(dim, dim); 75 CuMatrix<Real> tmp2(dim, dim);
76 tmp2.CopyFromTp(*this); 76 tmp2.CopyFromTp(*this);
77 CU_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride, 77 CUBLAS_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride,
78 tmp.Data(), tmp.Dim().stride)); 78 tmp.Data(), tmp.Dim().stride));
79 this->CopyFromMat(tmp, kNoTrans); 79 this->CopyFromMat(tmp, kNoTrans);
80 CuDevice::Instantiate().AccuProfile(__func__, tim); 80 CuDevice::Instantiate().AccuProfile(__func__, tim);
diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc
index 595fd0aaa..143bd5115 100644
--- a/src/cudamatrix/cu-vector.cc
+++ b/src/cudamatrix/cu-vector.cc
@@ -49,8 +49,8 @@ Real VecVec(const CuVectorBase<Real> &a,
49#if HAVE_CUDA == 1 49#if HAVE_CUDA == 1
50 if (CuDevice::Instantiate().Enabled()) { 50 if (CuDevice::Instantiate().Enabled()) {
51 CuTimer tim; 51 CuTimer tim;
52 CU_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(), 52 CUBLAS_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(),
53 1, &result)); 53 1, &result));
54 CuDevice::Instantiate().AccuProfile(__func__, tim); 54 CuDevice::Instantiate().AccuProfile(__func__, tim);
55} else 55} else
56#endif 56#endif
@@ -451,10 +451,10 @@ void CuVectorBase<Real>::AddMatVec(const Real alpha,
451 451
452 // Everything is backwards in CuBlas. We need to reverse rows, columns, 452 // Everything is backwards in CuBlas. We need to reverse rows, columns,
453 // transpose-ness. 453 // transpose-ness.
454 CU_SAFE_CALL(cublas_gemv(GetCublasHandle(), 454 CUBLAS_SAFE_CALL(cublas_gemv(GetCublasHandle(),
455 (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), 455 (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T),
456 M.NumCols(), M.NumRows(), alpha, M.Data(), 456 M.NumCols(), M.NumRows(), alpha, M.Data(),
457 M.Stride(), v.Data(), 1, beta, data_, 1)); 457 M.Stride(), v.Data(), 1, beta, data_, 1));
458 458
459 CuDevice::Instantiate().AccuProfile(__func__, tim); 459 CuDevice::Instantiate().AccuProfile(__func__, tim);
460 } else 460 } else
@@ -478,8 +478,8 @@ void CuVectorBase<Real>::AddSpVec(const Real alpha,
478 478
479 // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but 479 // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but
480 // in CUBLAS, for some stupid reason, everything is reversed. 480 // in CUBLAS, for some stupid reason, everything is reversed.
481 CU_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(), 481 CUBLAS_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(),
482 alpha, M.Data(), v.Data(), 1, beta, data_, 1)); 482 alpha, M.Data(), v.Data(), 1, beta, data_, 1));
483 483
484 CuDevice::Instantiate().AccuProfile(__func__, tim); 484 CuDevice::Instantiate().AccuProfile(__func__, tim);
485 } else 485 } else
@@ -775,7 +775,7 @@ void CuVectorBase<double>::CopyFromVec(const CuVectorBase<float> &src) {
775 if (CuDevice::Instantiate().Enabled()) { 775 if (CuDevice::Instantiate().Enabled()) {
776 if (dim_ == 0) return; 776 if (dim_ == 0) return;
777 CuTimer tim; 777 CuTimer tim;
778 CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); 778 CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
779 CuDevice::Instantiate().AccuProfile(__func__, tim); 779 CuDevice::Instantiate().AccuProfile(__func__, tim);
780 } else 780 } else
781#endif 781#endif
@@ -792,7 +792,7 @@ void CuVectorBase<float>::CopyFromVec(const CuVectorBase<double> &src) {
792 if (CuDevice::Instantiate().Enabled()) { 792 if (CuDevice::Instantiate().Enabled()) {
793 if (dim_ == 0) return; 793 if (dim_ == 0) return;
794 CuTimer tim; 794 CuTimer tim;
795 CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1)); 795 CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
796 CuDevice::Instantiate().AccuProfile(__func__, tim); 796 CuDevice::Instantiate().AccuProfile(__func__, tim);
797 } else 797 } else
798#endif 798#endif
@@ -1089,8 +1089,8 @@ void CuVectorBase<Real>::CopyDiagFromMat(const CuMatrix<Real> &M) {
1089 if (CuDevice::Instantiate().Enabled()) { 1089 if (CuDevice::Instantiate().Enabled()) {
1090 KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); 1090 KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols()));
1091 CuTimer tim; 1091 CuTimer tim;
1092 CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1, 1092 CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1,
1093 data_, 1)); 1093 data_, 1));
1094 1094
1095 CuDevice::Instantiate().AccuProfile(__func__, tim); 1095 CuDevice::Instantiate().AccuProfile(__func__, tim);
1096 } else 1096 } else