summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: a505417)
raw | patch | inline | side by side (parent: a505417)
author | Sergei Nikolaev <snikolaev@nvidia.com> | |
Fri, 16 Mar 2018 01:26:55 +0000 (18:26 -0700) | ||
committer | Sergei Nikolaev <snikolaev@nvidia.com> | |
Fri, 16 Mar 2018 01:26:55 +0000 (18:26 -0700) |
examples/faceboxes/SSD.prototxt | [new file with mode: 0644] | patch | blob |
examples/faceboxes/faceboxes_test.py | [new file with mode: 0644] | patch | blob |
examples/faceboxes/pepper.jpg | [new file with mode: 0644] | patch | blob |
src/caffe/common.cpp | patch | blob | history | |
src/caffe/layers/smooth_L1_loss_layer.cu | patch | blob | history | |
src/caffe/net.cpp | patch | blob | history | |
src/caffe/util/bbox_util.cu | patch | blob | history |
diff --git a/examples/faceboxes/SSD.prototxt b/examples/faceboxes/SSD.prototxt
--- /dev/null
@@ -0,0 +1,1540 @@
+name: "VGG_WIDER_FACE_SFD_deploy"
+
+default_forward_type: FLOAT16
+default_backward_type: FLOAT16
+default_forward_math: FLOAT16
+default_backward_math: FLOAT16
+
+#global_grad_scale: 10
+#global_grad_scale_adaptive: true
+
+input: "data"
+input_shape {
+ dim: 1
+ dim: 3
+ dim: 1080
+ dim: 1920
+}
+layer {
+ name: "conv1_1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu1_1"
+ type: "ReLU"
+ bottom: "conv1_1"
+ top: "conv1_1"
+}
+layer {
+ name: "conv1_2"
+ type: "Convolution"
+ bottom: "conv1_1"
+ top: "conv1_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu1_2"
+ type: "ReLU"
+ bottom: "conv1_2"
+ top: "conv1_2"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1_2"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv2_1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu2_1"
+ type: "ReLU"
+ bottom: "conv2_1"
+ top: "conv2_1"
+}
+layer {
+ name: "conv2_2"
+ type: "Convolution"
+ bottom: "conv2_1"
+ top: "conv2_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu2_2"
+ type: "ReLU"
+ bottom: "conv2_2"
+ top: "conv2_2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2_2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv3_1"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu3_1"
+ type: "ReLU"
+ bottom: "conv3_1"
+ top: "conv3_1"
+}
+layer {
+ name: "conv3_2"
+ type: "Convolution"
+ bottom: "conv3_1"
+ top: "conv3_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu3_2"
+ type: "ReLU"
+ bottom: "conv3_2"
+ top: "conv3_2"
+}
+layer {
+ name: "conv3_3"
+ type: "Convolution"
+ bottom: "conv3_2"
+ top: "conv3_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu3_3"
+ type: "ReLU"
+ bottom: "conv3_3"
+ top: "conv3_3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3_3"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv4_1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "conv4_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu4_1"
+ type: "ReLU"
+ bottom: "conv4_1"
+ top: "conv4_1"
+}
+layer {
+ name: "conv4_2"
+ type: "Convolution"
+ bottom: "conv4_1"
+ top: "conv4_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu4_2"
+ type: "ReLU"
+ bottom: "conv4_2"
+ top: "conv4_2"
+}
+layer {
+ name: "conv4_3"
+ type: "Convolution"
+ bottom: "conv4_2"
+ top: "conv4_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu4_3"
+ type: "ReLU"
+ bottom: "conv4_3"
+ top: "conv4_3"
+}
+layer {
+ name: "pool4"
+ type: "Pooling"
+ bottom: "conv4_3"
+ top: "pool4"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "conv5_1"
+ type: "Convolution"
+ bottom: "pool4"
+ top: "conv5_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ dilation: 1
+ }
+}
+layer {
+ name: "relu5_1"
+ type: "ReLU"
+ bottom: "conv5_1"
+ top: "conv5_1"
+}
+layer {
+ name: "conv5_2"
+ type: "Convolution"
+ bottom: "conv5_1"
+ top: "conv5_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ dilation: 1
+ }
+}
+layer {
+ name: "relu5_2"
+ type: "ReLU"
+ bottom: "conv5_2"
+ top: "conv5_2"
+}
+layer {
+ name: "conv5_3"
+ type: "Convolution"
+ bottom: "conv5_2"
+ top: "conv5_3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ dilation: 1
+ }
+}
+layer {
+ name: "relu5_3"
+ type: "ReLU"
+ bottom: "conv5_3"
+ top: "conv5_3"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5_3"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 2
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 1024
+ pad: 3
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ dilation: 1
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "fc7"
+ type: "Convolution"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 1024
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "conv6_1"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "conv6_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv6_1_relu"
+ type: "ReLU"
+ bottom: "conv6_1"
+ top: "conv6_1"
+}
+layer {
+ name: "conv6_2"
+ type: "Convolution"
+ bottom: "conv6_1"
+ top: "conv6_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 512
+ pad: 1
+ kernel_size: 3
+ stride: 2
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv6_2_relu"
+ type: "ReLU"
+ bottom: "conv6_2"
+ top: "conv6_2"
+}
+layer {
+ name: "conv7_1"
+ type: "Convolution"
+ bottom: "conv6_2"
+ top: "conv7_1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 0
+ kernel_size: 1
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv7_1_relu"
+ type: "ReLU"
+ bottom: "conv7_1"
+ top: "conv7_1"
+}
+layer {
+ name: "conv7_2"
+ type: "Convolution"
+ bottom: "conv7_1"
+ top: "conv7_2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ stride: 2
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv7_2_relu"
+ type: "ReLU"
+ bottom: "conv7_2"
+ top: "conv7_2"
+}
+layer {
+ name: "conv3_3_norm"
+ type: "Normalize"
+ bottom: "conv3_3"
+ top: "conv3_3_norm"
+ norm_param {
+ across_spatial: false
+ scale_filler {
+ type: "constant"
+ value: 10
+ }
+ channel_shared: false
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_loc"
+ type: "Convolution"
+ bottom: "conv3_3_norm"
+ top: "conv3_3_norm_mbox_loc"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_loc_perm"
+ type: "Permute"
+ bottom: "conv3_3_norm_mbox_loc"
+ top: "conv3_3_norm_mbox_loc_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_loc_flat"
+ type: "Flatten"
+ bottom: "conv3_3_norm_mbox_loc_perm"
+ top: "conv3_3_norm_mbox_loc_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_conf"
+ type: "Convolution"
+ bottom: "conv3_3_norm"
+ top: "conv3_3_norm_mbox_conf"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_conf_slice"
+ type: "Slice"
+ bottom: "conv3_3_norm_mbox_conf"
+ top: "conv3_3_norm_mbox_conf1"
+ top: "conv3_3_norm_mbox_conf2"
+ top: "conv3_3_norm_mbox_conf3"
+ top: "conv3_3_norm_mbox_conf4"
+ slice_param {
+ axis: 1
+ slice_point: 1
+ slice_point: 2
+ slice_point: 3
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_conf_maxout"
+ type: "Eltwise"
+ bottom: "conv3_3_norm_mbox_conf1"
+ bottom: "conv3_3_norm_mbox_conf2"
+ bottom: "conv3_3_norm_mbox_conf3"
+ top: "conv3_3_norm_mbox_conf_maxout"
+ eltwise_param {
+ operation: MAX
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_conf_out"
+ type: "Concat"
+ bottom: "conv3_3_norm_mbox_conf_maxout"
+ bottom: "conv3_3_norm_mbox_conf4"
+ top: "conv3_3_norm_mbox_conf_out"
+ concat_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_conf_perm"
+ type: "Permute"
+ bottom: "conv3_3_norm_mbox_conf_out"
+ top: "conv3_3_norm_mbox_conf_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_conf_flat"
+ type: "Flatten"
+ bottom: "conv3_3_norm_mbox_conf_perm"
+ top: "conv3_3_norm_mbox_conf_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv3_3_norm_mbox_priorbox"
+ type: "PriorBox"
+ bottom: "conv3_3_norm"
+ bottom: "data"
+ top: "conv3_3_norm_mbox_priorbox"
+ prior_box_param {
+ min_size: 16
+ clip: false
+ variance: 0.1
+ variance: 0.1
+ variance: 0.2
+ variance: 0.2
+ step: 4
+ offset: 0.5
+ }
+}
+layer {
+ name: "conv4_3_norm"
+ type: "Normalize"
+ bottom: "conv4_3"
+ top: "conv4_3_norm"
+ norm_param {
+ across_spatial: false
+ scale_filler {
+ type: "constant"
+ value: 8
+ }
+ channel_shared: false
+ }
+}
+layer {
+ name: "conv4_3_norm_mbox_loc"
+ type: "Convolution"
+ bottom: "conv4_3_norm"
+ top: "conv4_3_norm_mbox_loc"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv4_3_norm_mbox_loc_perm"
+ type: "Permute"
+ bottom: "conv4_3_norm_mbox_loc"
+ top: "conv4_3_norm_mbox_loc_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv4_3_norm_mbox_loc_flat"
+ type: "Flatten"
+ bottom: "conv4_3_norm_mbox_loc_perm"
+ top: "conv4_3_norm_mbox_loc_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv4_3_norm_mbox_conf"
+ type: "Convolution"
+ bottom: "conv4_3_norm"
+ top: "conv4_3_norm_mbox_conf"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 2
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv4_3_norm_mbox_conf_perm"
+ type: "Permute"
+ bottom: "conv4_3_norm_mbox_conf"
+ top: "conv4_3_norm_mbox_conf_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv4_3_norm_mbox_conf_flat"
+ type: "Flatten"
+ bottom: "conv4_3_norm_mbox_conf_perm"
+ top: "conv4_3_norm_mbox_conf_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv4_3_norm_mbox_priorbox"
+ type: "PriorBox"
+ bottom: "conv4_3_norm"
+ bottom: "data"
+ top: "conv4_3_norm_mbox_priorbox"
+ prior_box_param {
+ min_size: 32
+ clip: false
+ variance: 0.1
+ variance: 0.1
+ variance: 0.2
+ variance: 0.2
+ step: 8
+ offset: 0.5
+ }
+}
+layer {
+ name: "conv5_3_norm"
+ type: "Normalize"
+ bottom: "conv5_3"
+ top: "conv5_3_norm"
+ norm_param {
+ across_spatial: false
+ scale_filler {
+ type: "constant"
+ value: 5
+ }
+ channel_shared: false
+ }
+}
+layer {
+ name: "conv5_3_norm_mbox_loc"
+ type: "Convolution"
+ bottom: "conv5_3_norm"
+ top: "conv5_3_norm_mbox_loc"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv5_3_norm_mbox_loc_perm"
+ type: "Permute"
+ bottom: "conv5_3_norm_mbox_loc"
+ top: "conv5_3_norm_mbox_loc_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv5_3_norm_mbox_loc_flat"
+ type: "Flatten"
+ bottom: "conv5_3_norm_mbox_loc_perm"
+ top: "conv5_3_norm_mbox_loc_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv5_3_norm_mbox_conf"
+ type: "Convolution"
+ bottom: "conv5_3_norm"
+ top: "conv5_3_norm_mbox_conf"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 2
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv5_3_norm_mbox_conf_perm"
+ type: "Permute"
+ bottom: "conv5_3_norm_mbox_conf"
+ top: "conv5_3_norm_mbox_conf_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv5_3_norm_mbox_conf_flat"
+ type: "Flatten"
+ bottom: "conv5_3_norm_mbox_conf_perm"
+ top: "conv5_3_norm_mbox_conf_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv5_3_norm_mbox_priorbox"
+ type: "PriorBox"
+ bottom: "conv5_3_norm"
+ bottom: "data"
+ top: "conv5_3_norm_mbox_priorbox"
+ prior_box_param {
+ min_size: 64
+ clip: false
+ variance: 0.1
+ variance: 0.1
+ variance: 0.2
+ variance: 0.2
+ step: 16
+ offset: 0.5
+ }
+}
+layer {
+ name: "fc7_mbox_loc"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "fc7_mbox_loc"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "fc7_mbox_loc_perm"
+ type: "Permute"
+ bottom: "fc7_mbox_loc"
+ top: "fc7_mbox_loc_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "fc7_mbox_loc_flat"
+ type: "Flatten"
+ bottom: "fc7_mbox_loc_perm"
+ top: "fc7_mbox_loc_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "fc7_mbox_conf"
+ type: "Convolution"
+ bottom: "fc7"
+ top: "fc7_mbox_conf"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 2
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "fc7_mbox_conf_perm"
+ type: "Permute"
+ bottom: "fc7_mbox_conf"
+ top: "fc7_mbox_conf_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "fc7_mbox_conf_flat"
+ type: "Flatten"
+ bottom: "fc7_mbox_conf_perm"
+ top: "fc7_mbox_conf_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "fc7_mbox_priorbox"
+ type: "PriorBox"
+ bottom: "fc7"
+ bottom: "data"
+ top: "fc7_mbox_priorbox"
+ prior_box_param {
+ min_size: 128
+ clip: false
+ variance: 0.1
+ variance: 0.1
+ variance: 0.2
+ variance: 0.2
+ step: 32
+ offset: 0.5
+ }
+}
+layer {
+ name: "conv6_2_mbox_loc"
+ type: "Convolution"
+ bottom: "conv6_2"
+ top: "conv6_2_mbox_loc"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv6_2_mbox_loc_perm"
+ type: "Permute"
+ bottom: "conv6_2_mbox_loc"
+ top: "conv6_2_mbox_loc_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv6_2_mbox_loc_flat"
+ type: "Flatten"
+ bottom: "conv6_2_mbox_loc_perm"
+ top: "conv6_2_mbox_loc_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv6_2_mbox_conf"
+ type: "Convolution"
+ bottom: "conv6_2"
+ top: "conv6_2_mbox_conf"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 2
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv6_2_mbox_conf_perm"
+ type: "Permute"
+ bottom: "conv6_2_mbox_conf"
+ top: "conv6_2_mbox_conf_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv6_2_mbox_conf_flat"
+ type: "Flatten"
+ bottom: "conv6_2_mbox_conf_perm"
+ top: "conv6_2_mbox_conf_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv6_2_mbox_priorbox"
+ type: "PriorBox"
+ bottom: "conv6_2"
+ bottom: "data"
+ top: "conv6_2_mbox_priorbox"
+ prior_box_param {
+ min_size: 256
+ clip: false
+ variance: 0.1
+ variance: 0.1
+ variance: 0.2
+ variance: 0.2
+ step: 64
+ offset: 0.5
+ }
+}
+layer {
+ name: "conv7_2_mbox_loc"
+ type: "Convolution"
+ bottom: "conv7_2"
+ top: "conv7_2_mbox_loc"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 4
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv7_2_mbox_loc_perm"
+ type: "Permute"
+ bottom: "conv7_2_mbox_loc"
+ top: "conv7_2_mbox_loc_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv7_2_mbox_loc_flat"
+ type: "Flatten"
+ bottom: "conv7_2_mbox_loc_perm"
+ top: "conv7_2_mbox_loc_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv7_2_mbox_conf"
+ type: "Convolution"
+ bottom: "conv7_2"
+ top: "conv7_2_mbox_conf"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 2
+ pad: 1
+ kernel_size: 3
+ stride: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "conv7_2_mbox_conf_perm"
+ type: "Permute"
+ bottom: "conv7_2_mbox_conf"
+ top: "conv7_2_mbox_conf_perm"
+ permute_param {
+ order: 0
+ order: 2
+ order: 3
+ order: 1
+ }
+}
+layer {
+ name: "conv7_2_mbox_conf_flat"
+ type: "Flatten"
+ bottom: "conv7_2_mbox_conf_perm"
+ top: "conv7_2_mbox_conf_flat"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "conv7_2_mbox_priorbox"
+ type: "PriorBox"
+ bottom: "conv7_2"
+ bottom: "data"
+ top: "conv7_2_mbox_priorbox"
+ prior_box_param {
+ min_size: 512
+ clip: false
+ variance: 0.1
+ variance: 0.1
+ variance: 0.2
+ variance: 0.2
+ step: 128
+ offset: 0.5
+ }
+}
+layer {
+ name: "mbox_loc"
+ type: "Concat"
+ bottom: "conv3_3_norm_mbox_loc_flat"
+ bottom: "conv4_3_norm_mbox_loc_flat"
+ bottom: "conv5_3_norm_mbox_loc_flat"
+ bottom: "fc7_mbox_loc_flat"
+ bottom: "conv6_2_mbox_loc_flat"
+ bottom: "conv7_2_mbox_loc_flat"
+ top: "mbox_loc"
+ concat_param {
+ axis: 1
+ }
+}
+layer {
+ name: "mbox_conf"
+ type: "Concat"
+ bottom: "conv3_3_norm_mbox_conf_flat"
+ bottom: "conv4_3_norm_mbox_conf_flat"
+ bottom: "conv5_3_norm_mbox_conf_flat"
+ bottom: "fc7_mbox_conf_flat"
+ bottom: "conv6_2_mbox_conf_flat"
+ bottom: "conv7_2_mbox_conf_flat"
+ top: "mbox_conf"
+ concat_param {
+ axis: 1
+ }
+}
+layer {
+ name: "mbox_priorbox"
+ type: "Concat"
+ bottom: "conv3_3_norm_mbox_priorbox"
+ bottom: "conv4_3_norm_mbox_priorbox"
+ bottom: "conv5_3_norm_mbox_priorbox"
+ bottom: "fc7_mbox_priorbox"
+ bottom: "conv6_2_mbox_priorbox"
+ bottom: "conv7_2_mbox_priorbox"
+ top: "mbox_priorbox"
+ concat_param {
+ axis: 2
+ }
+}
+layer {
+ name: "mbox_conf_reshape"
+ type: "Reshape"
+ bottom: "mbox_conf"
+ top: "mbox_conf_reshape"
+ reshape_param {
+ shape {
+ dim: 0
+ dim: -1
+ dim: 2
+ }
+ }
+}
+layer {
+ name: "mbox_conf_softmax"
+ type: "Softmax"
+ bottom: "mbox_conf_reshape"
+ top: "mbox_conf_softmax"
+ softmax_param {
+ axis: 2
+ }
+}
+layer {
+ name: "mbox_conf_flatten"
+ type: "Flatten"
+ bottom: "mbox_conf_softmax"
+ top: "mbox_conf_flatten"
+ flatten_param {
+ axis: 1
+ }
+}
+layer {
+ name: "detection_out"
+ type: "DetectionOutput"
+ bottom: "mbox_loc"
+ bottom: "mbox_conf_flatten"
+ bottom: "mbox_priorbox"
+ top: "detection_out"
+ include {
+ phase: TEST
+ }
+ detection_output_param {
+ num_classes: 2
+ share_location: true
+ background_label_id: 0
+ nms_param {
+ nms_threshold: 0.3
+ top_k: 5000
+ }
+ code_type: CENTER_SIZE
+ keep_top_k: 750
+ confidence_threshold: 0.6
+ }
+}
+
diff --git a/examples/faceboxes/faceboxes_test.py b/examples/faceboxes/faceboxes_test.py
--- /dev/null
@@ -0,0 +1,61 @@
+import numpy as np
+import sys, os
+import cv2
+
+sys.path.insert(0, '../../python')
+import caffe
+import time
+
+net_file = 'SSD.prototxt'
+caffe_model = 'SSD.caffemodel'
+test_dir = "images"
+
+if not os.path.exists(caffe_model):
+ print("SSD.caffemodel does not exist, see https://github.com/sfzhang15/SFD")
+ exit()
+caffe.set_mode_gpu()
+net = caffe.Net(net_file, caffe_model, caffe.TEST)
+
+CLASSES = ('background',
+ 'face')
+
+transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
+transformer.set_transpose('data', (2, 0, 1))
+transformer.set_mean('data', np.array([104, 117, 123])) # mean pixel
+
+
+def postprocess(img, out):
+ h = img.shape[0]
+ w = img.shape[1]
+ box = out['detection_out'][0, 0, :, 3:7] * np.array([w, h, w, h])
+ cls = out['detection_out'][0, 0, :, 1]
+ conf = out['detection_out'][0, 0, :, 2]
+ return (box.astype(np.int32), conf, cls)
+
+
+def detect(imgfile):
+ frame = cv2.imread(imgfile)
+ transformed_image = transformer.preprocess('data', frame)
+ net.blobs['data'].data[...] = transformed_image
+ time_start = time.time()
+ out = net.forward()
+ time_end = time.time()
+ print (time_end - time_start),
+ print ("s")
+
+ box, conf, cls = postprocess(frame, out)
+
+ for i in range(len(box)):
+ p1 = (box[i][0], box[i][1])
+ p2 = (box[i][2], box[i][3])
+ cv2.rectangle(frame, p1, p2, (0, 255, 0))
+ p3 = (max(p1[0], 15), max(p1[1], 15))
+ title = "%s:%.2f" % (CLASSES[int(cls[i])], conf[i])
+ cv2.putText(frame, title, p3, cv2.FONT_ITALIC, 0.6, (0, 255, 0), 1)
+ cv2.imshow("SSD, %d boxes" % len(box), frame)
+ cv2.waitKey()
+ # if cv2.waitKey(100) & 0xFF == ord('q'):
+ # break
+
+
+detect("pepper.jpg")
diff --git a/examples/faceboxes/pepper.jpg b/examples/faceboxes/pepper.jpg
new file mode 100644 (file)
index 0000000..c019ed4
Binary files /dev/null and b/examples/faceboxes/pepper.jpg differ
index 0000000..c019ed4
Binary files /dev/null and b/examples/faceboxes/pepper.jpg differ
diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp
index 1d48063a93c09bc7876ce8f029465b03120bb39a..f5a1a2f7812aa068d2a64b72815a1568f09ba5c3 100644 (file)
--- a/src/caffe/common.cpp
+++ b/src/caffe/common.cpp
void Caffe::init() {
if (mode_ == GPU && curand_generator_ == nullptr) {
curand_stream_ = CudaStream::create();
- CURAND_CHECK_ARG(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT), device_);
- CURAND_CHECK_ARG(curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()), device_);
- CURAND_CHECK_ARG(curandSetStream(curand_generator_, curand_stream_->get()), device_);
+ CURAND_CHECK(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
+ CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()));
+ CURAND_CHECK(curandSetStream(curand_generator_, curand_stream_->get()));
}
}
index 244766c03c19bc76d774116fd1677fc20c75985d..678e9cc0b07aa23ad3281bb22e0395b5a41a410b 100644 (file)
diff_.gpu_data(),
diff_.mutable_gpu_data()); // d := w * (b0 - b1)
}
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
- SmoothL1Forward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ SmoothL1Forward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS, 0, stream>>>(
count, diff_.gpu_data(), errors_.mutable_gpu_data());
+ CUDA_CHECK(cudaStreamSynchronize(stream));
CUDA_POST_KERNEL_CHECK;
Dtype loss;
void SmoothL1LossLayer<Ftype, Btype>::Backward_gpu(const vector<Blob*>& top,
const vector<bool>& propagate_down, const vector<Blob*>& bottom) {
int count = diff_.count();
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
- SmoothL1Backward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ SmoothL1Backward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS, 0, stream>>>(
count, diff_.gpu_data(), diff_.mutable_gpu_data());
CUDA_POST_KERNEL_CHECK;
+ CUDA_CHECK(cudaStreamSynchronize(stream));
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 6b2822af0151835abb3f4bb2eb590fa809cf503f..1e27a3b5cfe7708c087be459056f5d092a4484fe 100644 (file)
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
if (global_grad_scale_adaptive_) {
const float wgsq = wgrad_sq();
if (wgsq > 0.F) {
- global_grad_scale_coeff_ = std::sqrt(std::max(wgsq, 1.F / Caffe::solver_count())) * global_grad_scale_param_;
+ global_grad_scale_coeff_ = std::sqrt(wgsq) * global_grad_scale_param_;
return;
}
}
index 460a222e528c19ea9df4f706da6f5dbadf0e5b36..5a4a6eb580743fd6e14c91fbdadf05fd2e131c6a 100644 (file)
const int num_priors, const bool share_location,
const int num_loc_classes, const int background_label_id,
const bool clip_bbox, Dtype* bbox_data) {
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
- DecodeBBoxesKernel<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
- CAFFE_CUDA_NUM_THREADS>>>(nthreads, loc_data, prior_data, code_type,
- variance_encoded_in_target, num_priors, share_location, num_loc_classes,
- background_label_id, clip_bbox, bbox_data);
+ DecodeBBoxesKernel<Dtype><<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS, 0, stream>>>
+ (nthreads, loc_data, prior_data, code_type, variance_encoded_in_target, num_priors,
+ share_location, num_loc_classes, background_label_id, clip_bbox, bbox_data);
CUDA_POST_KERNEL_CHECK;
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
template void DecodeBBoxesGPU(const int nthreads,
void PermuteDataGPU(const int nthreads,
const Dtype* data, const int num_classes, const int num_data,
const int num_dim, Dtype* new_data) {
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
PermuteDataKernel<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
- CAFFE_CUDA_NUM_THREADS>>>(nthreads, data, num_classes, num_data,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(nthreads, data, num_classes, num_data,
num_dim, new_data);
CUDA_POST_KERNEL_CHECK;
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
template void PermuteDataGPU(const int nthreads,
TBlob<Dtype> scale(shape);
Dtype* scale_data = scale.mutable_gpu_data();
int count = outer_num * channels * inner_num;
+ cudaStream_t stream = Caffe::thread_stream();
// We need to subtract the max to avoid numerical issues, compute the exp,
// and then normalize.
// compute max
// NOLINT_NEXT_LINE(whitespace/operators)
kernel_channel_max<Dtype><<<CAFFE_GET_BLOCKS(outer_num * inner_num),
- CAFFE_CUDA_NUM_THREADS>>>(outer_num, channels, inner_num, data,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(outer_num, channels, inner_num, data,
scale_data);
// subtract
// NOLINT_NEXT_LINE(whitespace/operators)
kernel_channel_subtract<Dtype><<<CAFFE_GET_BLOCKS(count),
- CAFFE_CUDA_NUM_THREADS>>>(count, outer_num, channels, inner_num,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(count, outer_num, channels, inner_num,
data, scale_data, prob);
// exponentiate
// NOLINT_NEXT_LINE(whitespace/operators)
- kernel_exp<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ kernel_exp<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS, 0, stream>>>(
count, prob, prob);
// sum after exp
// NOLINT_NEXT_LINE(whitespace/operators)
kernel_channel_sum<Dtype><<<CAFFE_GET_BLOCKS(outer_num * inner_num),
- CAFFE_CUDA_NUM_THREADS>>>(outer_num, channels, inner_num, prob,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(outer_num, channels, inner_num, prob,
scale_data);
// divide
// NOLINT_NEXT_LINE(whitespace/operators)
kernel_channel_div<Dtype><<<CAFFE_GET_BLOCKS(count),
- CAFFE_CUDA_NUM_THREADS>>>(count, outer_num, channels, inner_num,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(count, outer_num, channels, inner_num,
scale_data, prob);
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
template void SoftMaxGPU(const float* data, const int outer_num,
void ComputeOverlappedGPU(const int nthreads,
const Dtype* bbox_data, const int num_bboxes, const int num_classes,
const Dtype overlap_threshold, bool* overlapped_data) {
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
ComputeOverlappedKernel<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
- CAFFE_CUDA_NUM_THREADS>>>(nthreads, bbox_data, num_bboxes, num_classes,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(nthreads, bbox_data, num_bboxes, num_classes,
overlap_threshold, overlapped_data);
CUDA_POST_KERNEL_CHECK;
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
template void ComputeOverlappedGPU(const int nthreads,
void ComputeOverlappedByIdxGPU(const int nthreads,
const Dtype* bbox_data, const Dtype overlap_threshold,
const int* idx, const int num_idx, bool* overlapped_data) {
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
ComputeOverlappedByIdxKernel<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
- CAFFE_CUDA_NUM_THREADS>>>(nthreads, bbox_data, overlap_threshold,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(nthreads, bbox_data, overlap_threshold,
idx, num_idx, overlapped_data);
CUDA_POST_KERNEL_CHECK;
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
template void ComputeOverlappedByIdxGPU(const int nthreads,
// Prepare detection_blob.
detection_blob->Reshape(1, 1, num_det, 7);
Dtype* detection_data = detection_blob->mutable_gpu_data();
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
GetDetectionsKernel<Dtype><<<CAFFE_GET_BLOCKS(num_det),
- CAFFE_CUDA_NUM_THREADS>>>(num_det, bbox_data, conf_data, image_id, label,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(num_det, bbox_data, conf_data, image_id, label,
idx_blob.gpu_data(), clip_bbox, detection_data);
CUDA_POST_KERNEL_CHECK;
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
template void GetDetectionsGPU(const float* bbox_data, const float* conf_data,
TBlob<Dtype> conf_loss_blob(num, num_preds_per_class, 1, 1);
Dtype* conf_loss_gpu_data = conf_loss_blob.mutable_gpu_data();
const int num_threads = num * num_preds_per_class;
+ cudaStream_t stream = Caffe::thread_stream();
// NOLINT_NEXT_LINE(whitespace/operators)
ComputeConfLossKernel<Dtype><<<CAFFE_GET_BLOCKS(num_threads),
- CAFFE_CUDA_NUM_THREADS>>>(num_threads, conf_gpu_data, num_preds_per_class,
+ CAFFE_CUDA_NUM_THREADS, 0, stream>>>(num_threads, conf_gpu_data, num_preds_per_class,
num_classes, loss_type, match_blob.gpu_data(), conf_loss_gpu_data);
+ CUDA_CHECK(cudaStreamSynchronize(stream));
// Save the loss.
all_conf_loss->clear();
const Dtype* loss_data = conf_loss_blob.cpu_data();