aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Povey2017-10-09 18:13:56 -0500
committerGitHub2017-10-09 18:13:56 -0500
commit9eccdbebc5204cf64c9995839a5b1b716e0b3973 (patch)
tree4c8f76272a4651b0be04b457df8cb0c52ddb157c
parent0f2df80141268206214c550ab1edc9888a09c6ac (diff)
downloadkaldi-9eccdbebc5204cf64c9995839a5b1b716e0b3973.tar.gz
kaldi-9eccdbebc5204cf64c9995839a5b1b716e0b3973.tar.xz
kaldi-9eccdbebc5204cf64c9995839a5b1b716e0b3973.zip
[src] Some fixes to cu-rand to ensure deterministic behavior. (#1927)
-rw-r--r--src/cudamatrix/cu-rand.cc29
-rw-r--r--src/cudamatrix/cu-rand.h10
2 files changed, 31 insertions, 8 deletions
diff --git a/src/cudamatrix/cu-rand.cc b/src/cudamatrix/cu-rand.cc
index fec5c1225..556fe12c3 100644
--- a/src/cudamatrix/cu-rand.cc
+++ b/src/cudamatrix/cu-rand.cc
@@ -63,7 +63,11 @@ void CuRand<Real>::RandUniform(CuMatrixBase<Real> *tgt) {
63 CuTimer tim; 63 CuTimer tim;
64 // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, 64 // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix,
65 // so we should not use it to generate random numbers over whole stride. 65 // so we should not use it to generate random numbers over whole stride.
66 CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); 66 // Use the option kStrideEqualNumCols to ensure consistency
67 // (because when memory is nearly exhausted, the stride of CudaMallocPitch
68 // may vary).
69 CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols(), kUndefined,
70 kStrideEqualNumCols);
67 CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tmp.Data(), tmp.NumRows() * tmp.Stride())); 71 CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tmp.Data(), tmp.NumRows() * tmp.Stride()));
68 tgt->CopyFromMat(tmp); 72 tgt->CopyFromMat(tmp);
69 CuDevice::Instantiate().AccuProfile(__func__, tim); 73 CuDevice::Instantiate().AccuProfile(__func__, tim);
@@ -113,8 +117,12 @@ void CuRand<Real>::RandGaussian(CuMatrixBase<Real> *tgt) {
113 // Also, we ensure to have 'even' number of elements for calling 'curand' 117 // Also, we ensure to have 'even' number of elements for calling 'curand'
114 // by possibly adding one column. Even number of elements is required by 118 // by possibly adding one column. Even number of elements is required by
115 // curandGenerateUniform(), curandGenerateUniformDouble(). 119 // curandGenerateUniform(), curandGenerateUniformDouble().
120 // Use the option kStrideEqualNumCols to ensure consistency
121 // (because when memory is nearly exhausted, the stride of CudaMallocPitch
122 // may vary).
116 MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, 123 MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1,
117 CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined); 124 CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined,
125 kStrideEqualNumCols);
118 CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride())); 126 CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride()));
119 tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); 127 tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols()));
120 CuDevice::Instantiate().AccuProfile(__func__, tim); 128 CuDevice::Instantiate().AccuProfile(__func__, tim);
@@ -135,10 +143,15 @@ void CuRand<Real>::RandGaussian(CuMatrix<Real> *tgt) {
135 if (0 == (num_elements % 2)) { 143 if (0 == (num_elements % 2)) {
136 CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), num_elements)); 144 CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), num_elements));
137 } else { 145 } else {
138 // We use 'tmp' matrix with one column added, this guarantees 'even' number of elements. 146 // We use 'tmp' matrix with one column added, this guarantees an even
147 // number of elements. Use the option kStrideEqualNumCols to ensure
148 // consistency (because when memory is nearly exhausted, the stride of
149 // CudaMallocPitch may vary).
139 MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, 150 MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1,
140 CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined); 151 CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined,
141 CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride())); 152 kStrideEqualNumCols);
153 CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(),
154 tmp.NumRows() * tmp.Stride()));
142 tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); 155 tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols()));
143 } 156 }
144 CuDevice::Instantiate().AccuProfile(__func__, tim); 157 CuDevice::Instantiate().AccuProfile(__func__, tim);
@@ -187,7 +200,10 @@ void CuRand<Real>::BinarizeProbs(const CuMatrix<Real> &probs, CuMatrix<Real> *st
187/// add gaussian noise to each element 200/// add gaussian noise to each element
188template<typename Real> 201template<typename Real>
189void CuRand<Real>::AddGaussNoise(CuMatrix<Real> *tgt, Real gscale) { 202void CuRand<Real>::AddGaussNoise(CuMatrix<Real> *tgt, Real gscale) {
190 CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols()); 203 // Use the option kStrideEqualNumCols to ensure consistency (because when
204 // memory is nearly exhausted, the stride of CudaMallocPitch may vary).
205 CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols(),
206 kUndefined, kStrideEqualNumCols);
191 this->RandGaussian(&tmp); 207 this->RandGaussian(&tmp);
192 tgt->AddMat(gscale, tmp); 208 tgt->AddMat(gscale, tmp);
193} 209}
@@ -197,4 +213,3 @@ template class CuRand<float>;
197template class CuRand<double>; 213template class CuRand<double>;
198 214
199} // namespace, 215} // namespace,
200
diff --git a/src/cudamatrix/cu-rand.h b/src/cudamatrix/cu-rand.h
index 2c8204b6b..d999d6707 100644
--- a/src/cudamatrix/cu-rand.h
+++ b/src/cudamatrix/cu-rand.h
@@ -66,6 +66,15 @@ class CuRand {
66 #endif 66 #endif
67 } 67 }
68 68
69 // CAUTION.
70 // For the versions of these functions that output to a CuMatrix (as opposed to
71 // CuMatrixBase), the random numbers depend on the stride, and the stride
72 // is not guaranteed to be consistent for the same dimension of matrix
73 // (it usually will be, but not when memory is nearly exhausted). So
74 // for applications where consistency is essential, either use the versions
75 // of these function that accept CuMatrixBase, or initialize your matrix
76 // with the kStrideEqualNumCols argument to ensure consistent stride.
77
69 /// Fill with uniform [0..1] floats, 78 /// Fill with uniform [0..1] floats,
70 void RandUniform(CuMatrixBase<Real> *tgt); 79 void RandUniform(CuMatrixBase<Real> *tgt);
71 void RandUniform(CuMatrix<Real> *tgt); 80 void RandUniform(CuMatrix<Real> *tgt);
@@ -89,4 +98,3 @@ class CuRand {
89} // namsepace 98} // namsepace
90 99
91#endif // KALDI_CUDAMATRIX_CU_RAND_H_ 100#endif // KALDI_CUDAMATRIX_CU_RAND_H_
92