1 #include "cuda_runtime.h"
3 #include "caffeine/common.hpp"
4 #include "caffeine/syncedmem.hpp"
6 namespace caffeine {
8 SyncedMemory::~SyncedMemory() {
9 if (cpu_ptr_) {
10 CUDA_CHECK(cudaFreeHost(cpu_ptr_));
11 }
13 if (gpu_ptr_) {
14 CUDA_CHECK(cudaFree(gpu_ptr_));
15 }
16 }
18 inline void SyncedMemory::to_cpu() {
19 switch(head_) {
20 case UNINITIALIZED:
21 CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
22 memset(cpu_ptr_, 0, size_);
23 head_ = HEAD_AT_CPU;
24 break;
25 case HEAD_AT_GPU:
26 if (cpu_ptr_ == NULL) {
27 CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
28 CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDeviceToHost));
29 }
30 head_ = SYNCED;
31 break;
32 case HEAD_AT_CPU:
33 case SYNCED:
34 break;
35 }
36 }
38 inline void SyncedMemory::to_gpu() {
39 switch(head_) {
40 case UNINITIALIZED:
41 CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
42 CUDA_CHECK(cudaMemset(gpu_ptr_, 0, size_));
43 head_ = HEAD_AT_GPU;
44 break;
45 case HEAD_AT_CPU:
46 if (gpu_ptr_ == NULL) {
47 CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
48 CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyHostToDevice));
49 }
50 head_ = SYNCED;
51 break;
52 case HEAD_AT_GPU:
53 case SYNCED:
54 break;
55 }
56 }
59 inline const void* SyncedMemory::cpu_data() {
60 to_cpu();
61 return (const void*)cpu_ptr_;
62 }
64 inline const void* SyncedMemory::gpu_data() {
65 to_gpu();
66 return (const void*)gpu_ptr_;
67 }
69 inline void* SyncedMemory::mutable_cpu_data() {
70 to_cpu();
71 head_ = HEAD_AT_CPU;
72 return cpu_ptr_;
73 }
75 inline void* SyncedMemory::mutable_gpu_data() {
76 to_gpu();
77 head_ = HEAD_AT_GPU;
78 return gpu_ptr_;
79 }
82 } // namespace caffeine