summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 06dc0d2)
raw | patch | inline | side by side (parent: 06dc0d2)
author | Yangqing Jia <jiayq84@gmail.com> | |
Wed, 18 Sep 2013 21:45:52 +0000 (14:45 -0700) | ||
committer | Yangqing Jia <jiayq84@gmail.com> | |
Wed, 18 Sep 2013 21:45:52 +0000 (14:45 -0700) |
src/caffeine/test/test_util_blas.cpp | [moved from src/caffeine/test/test_util_gemm.cpp with 68% similarity] | patch | blob | history |
src/caffeine/util/blas.cpp | [moved from src/caffeine/util/gemm.cpp with 63% similarity] | patch | blob | history |
src/caffeine/util/blas.hpp | [moved from src/caffeine/util/gemm.hpp with 71% similarity] | patch | blob | history |
similarity index 68%
rename from src/caffeine/test/test_util_gemm.cpp
rename to src/caffeine/test/test_util_blas.cpp
index 9ea7160b3f1672c30f5656e46b4261dc5df8254e..000311d9af63491d5900e54126b88e523962e538 100644 (file)
rename from src/caffeine/test/test_util_gemm.cpp
rename to src/caffeine/test/test_util_blas.cpp
index 9ea7160b3f1672c30f5656e46b4261dc5df8254e..000311d9af63491d5900e54126b88e523962e538 100644 (file)
#include "gtest/gtest.h"
#include "caffeine/blob.hpp"
-#include "caffeine/util/gemm.hpp"
+#include "caffeine/util/blas.hpp"
namespace caffeine {
}
+TYPED_TEST(GemmTest, TestGemv) {
+ Blob<TypeParam> A(1,1,2,3);
+ Blob<TypeParam> x(1,1,1,3);
+ Blob<TypeParam> y(1,1,1,2);
+ TypeParam data[6] = {1, 2, 3, 4, 5, 6};
+ TypeParam result_2[2] = {14, 32};
+ TypeParam result_3[3] = {9, 12, 15};
+ memcpy(A.mutable_cpu_data(), data, 6 * sizeof(TypeParam));
+ memcpy(x.mutable_cpu_data(), data, 3 * sizeof(TypeParam));
+
+ if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ decaf_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(),
+ x.cpu_data(), 0., y.mutable_cpu_data());
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_EQ(y.cpu_data()[i], result_2[i]);
+ }
+ decaf_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(),
+ x.gpu_data(), 0., y.mutable_gpu_data());
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_EQ(y.cpu_data()[i], result_2[i]);
+ }
+
+ // Test transpose case
+ memcpy(y.mutable_cpu_data(), data, 2 * sizeof(TypeParam));
+ decaf_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(),
+ y.cpu_data(), 0., x.mutable_cpu_data());
+ for (int i = 0; i < 3; ++i) {
+ EXPECT_EQ(x.cpu_data()[i], result_3[i]);
+ }
+ decaf_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(),
+ y.gpu_data(), 0., x.mutable_gpu_data());
+ for (int i = 0; i < 3; ++i) {
+ EXPECT_EQ(x.cpu_data()[i], result_3[i]);
+ }
+ } else {
+ LOG(ERROR) << "Skipping test due to old architecture.";
+ }
+}
+
}
similarity index 63%
rename from src/caffeine/util/gemm.cpp
rename to src/caffeine/util/blas.cpp
index 74a37660f81c7ce64af7b50187ba76de7f7a5c28..a1236322144460154c470eb7595c853a5e197637 100644 (file)
rename from src/caffeine/util/gemm.cpp
rename to src/caffeine/util/blas.cpp
index 74a37660f81c7ce64af7b50187ba76de7f7a5c28..a1236322144460154c470eb7595c853a5e197637 100644 (file)
#include <mkl.h>
#include <cublas_v2.h>
#include "caffeine/common.hpp"
-#include "caffeine/util/gemm.hpp"
+#include "caffeine/util/blas.hpp"
namespace caffeine {
N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
}
+template <>
+void decaf_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const float alpha, const float* A, const float* x,
+ const float beta, float* y) {
+ cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
+}
+
+template <>
+void decaf_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const double alpha, const double* A, const double* x,
+ const double beta, double* y) {
+ cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
+}
+
+template <>
+void decaf_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const float alpha, const float* A, const float* x,
+ const float beta, float* y) {
+ cublasOperation_t cuTransA =
+ (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
+ CUBLAS_CHECK(cublasSgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha,
+ A, N, x, 1, &beta, y, 1));
+}
+
+template <>
+void decaf_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const double alpha, const double* A, const double* x,
+ const double beta, double* y) {
+ cublasOperation_t cuTransA =
+ (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
+ CUBLAS_CHECK(cublasDgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha,
+ A, N, x, 1, &beta, y, 1));
+}
} // namespace caffeine
similarity index 71%
rename from src/caffeine/util/gemm.hpp
rename to src/caffeine/util/blas.hpp
index f6af9c3d49040fd67aa7f479ff8104165b0c8ff8..b1f4e3d60195ef287598e070c42ac9128bfd0a59 100644 (file)
rename from src/caffeine/util/gemm.hpp
rename to src/caffeine/util/blas.hpp
index f6af9c3d49040fd67aa7f479ff8104165b0c8ff8..b1f4e3d60195ef287598e070c42ac9128bfd0a59 100644 (file)
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
Dtype* C);
+template <typename Dtype>
+void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
+ Dtype* y);
+
+template <typename Dtype>
+void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
+ Dtype* y);
+
} // namespace caffeine
+
#endif // CAFFEINE_UTIL_GEMM_H_