1 #include <cstring>
2 #include <cuda_runtime.h>
4 #include "caffeine/common.hpp"
5 #include "caffeine/syncedmem.hpp"
7 namespace caffeine {
9 SyncedMemory::~SyncedMemory() {
10 if (cpu_ptr_) {
11 CUDA_CHECK(cudaFreeHost(cpu_ptr_));
12 }
14 if (gpu_ptr_) {
15 CUDA_CHECK(cudaFree(gpu_ptr_));
16 }
17 }
19 inline void SyncedMemory::to_cpu() {
20 switch(head_) {
21 case UNINITIALIZED:
22 CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
23 memset(cpu_ptr_, 0, size_);
24 head_ = HEAD_AT_CPU;
25 break;
26 case HEAD_AT_GPU:
27 if (cpu_ptr_ == NULL) {
28 CUDA_CHECK(cudaMallocHost(&cpu_ptr_, size_));
29 }
30 CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDeviceToHost));
31 head_ = SYNCED;
32 break;
33 case HEAD_AT_CPU:
34 case SYNCED:
35 break;
36 }
37 }
39 inline void SyncedMemory::to_gpu() {
40 switch(head_) {
41 case UNINITIALIZED:
42 CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
43 CUDA_CHECK(cudaMemset(gpu_ptr_, 0, size_));
44 head_ = HEAD_AT_GPU;
45 break;
46 case HEAD_AT_CPU:
47 if (gpu_ptr_ == NULL) {
48 CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
49 }
50 CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyHostToDevice));
51 head_ = SYNCED;
52 break;
53 case HEAD_AT_GPU:
54 case SYNCED:
55 break;
56 }
57 }
60 const void* SyncedMemory::cpu_data() {
61 to_cpu();
62 return (const void*)cpu_ptr_;
63 }
65 const void* SyncedMemory::gpu_data() {
66 to_gpu();
67 return (const void*)gpu_ptr_;
68 }
70 void* SyncedMemory::mutable_cpu_data() {
71 to_cpu();
72 head_ = HEAD_AT_CPU;
73 return cpu_ptr_;
74 }
76 void* SyncedMemory::mutable_gpu_data() {
77 to_gpu();
78 head_ = HEAD_AT_GPU;
79 return gpu_ptr_;
80 }
83 } // namespace caffeine