1 syntax = "proto2";
3 package caffe;
5 // Math and storage types
6 enum Type {
7 DOUBLE = 0;
8 FLOAT = 1;
9 FLOAT16 = 2;
10 INT = 3; // math not supported
11 UINT = 4; // math not supported
12 BOOL = 5; //math not supported
13 }
15 enum Packing {
16 NCHW = 0;
17 NHWC = 1;
18 }
20 // Specifies the shape (dimensions) of a Blob.
21 message BlobShape {
22 repeated int64 dim = 1 [packed = true];
23 }
25 message BlobProto {
26 optional BlobShape shape = 7;
27 repeated float data = 5 [packed = true];
28 repeated float diff = 6 [packed = true];
29 repeated double double_data = 8 [packed = true];
30 repeated double double_diff = 9 [packed = true];
31 // New raw storage (faster and takes 1/2 of space for FP16)
32 optional Type raw_data_type = 10;
33 optional Type raw_diff_type = 11;
34 optional bytes raw_data = 12 [packed = false];
35 optional bytes raw_diff = 13 [packed = false];
36 // 4D dimensions -- deprecated. Use "shape" instead.
37 optional int32 num = 1 [default = 0];
38 optional int32 channels = 2 [default = 0];
39 optional int32 height = 3 [default = 0];
40 optional int32 width = 4 [default = 0];
41 }
43 // The BlobProtoVector is simply a way to pass multiple blobproto instances
44 // around.
45 message BlobProtoVector {
46 repeated BlobProto blobs = 1;
47 }
49 message Datum {
50 optional int32 channels = 1;
51 optional int32 height = 2;
52 optional int32 width = 3;
53 // the actual image data, in bytes
54 optional bytes data = 4;
55 optional int32 label = 5;
56 // Optionally, the datum could also hold float data.
57 repeated float float_data = 6;
58 // If true data contains an encoded image that need to be decoded
59 optional bool encoded = 7 [default = false];
60 // Unique record index assigned by Reader
61 optional uint32 record_id = 8 [default = 0];
62 }
64 // The label (display) name and label id.
65 message LabelMapItem {
66 // Both name and label are required.
67 optional string name = 1;
68 optional int32 label = 2;
69 // display_name is optional.
70 optional string display_name = 3;
71 }
73 message LabelMap {
74 repeated LabelMapItem item = 1;
75 }
77 // Sample a bbox in the normalized space [0, 1] with provided constraints.
78 message Sampler {
79 // Minimum scale of the sampled bbox.
80 optional float min_scale = 1 [default = 1.];
81 // Maximum scale of the sampled bbox.
82 optional float max_scale = 2 [default = 1.];
84 // Minimum aspect ratio of the sampled bbox.
85 optional float min_aspect_ratio = 3 [default = 1.];
86 // Maximum aspect ratio of the sampled bbox.
87 optional float max_aspect_ratio = 4 [default = 1.];
88 }
90 // Constraints for selecting sampled bbox.
91 message SampleConstraint {
92 // Minimum Jaccard overlap between sampled bbox and all bboxes in
93 // AnnotationGroup.
94 optional float min_jaccard_overlap = 1;
95 // Maximum Jaccard overlap between sampled bbox and all bboxes in
96 // AnnotationGroup.
97 optional float max_jaccard_overlap = 2;
99 // Minimum coverage of sampled bbox by all bboxes in AnnotationGroup.
100 optional float min_sample_coverage = 3;
101 // Maximum coverage of sampled bbox by all bboxes in AnnotationGroup.
102 optional float max_sample_coverage = 4;
104 // Minimum coverage of all bboxes in AnnotationGroup by sampled bbox.
105 optional float min_object_coverage = 5;
106 // Maximum coverage of all bboxes in AnnotationGroup by sampled bbox.
107 optional float max_object_coverage = 6;
108 }
110 // Sample a batch of bboxes with provided constraints.
111 message BatchSampler {
112 // Use original image as the source for sampling.
113 optional bool use_original_image = 1 [default = true];
115 // Constraints for sampling bbox.
116 optional Sampler sampler = 2;
118 // Constraints for determining if a sampled bbox is positive or negative.
119 optional SampleConstraint sample_constraint = 3;
121 // If provided, break when found certain number of samples satisfing the
122 // sample_constraint.
123 optional uint32 max_sample = 4;
125 // Maximum number of trials for sampling to avoid infinite loop.
126 optional uint32 max_trials = 5 [default = 100];
127 }
129 // Condition for emitting annotations.
130 message EmitConstraint {
131 enum EmitType {
132 CENTER = 0;
133 MIN_OVERLAP = 1;
134 }
135 optional EmitType emit_type = 1 [default = CENTER];
136 // If emit_type is MIN_OVERLAP, provide the emit_overlap.
137 optional float emit_overlap = 2;
138 }
140 // The normalized bounding box [0, 1] w.r.t. the input image size.
141 message NormalizedBBox {
142 optional float xmin = 1;
143 optional float ymin = 2;
144 optional float xmax = 3;
145 optional float ymax = 4;
146 optional int32 label = 5;
147 optional bool difficult = 6;
148 optional float score = 7;
149 optional float size = 8;
151 // Added for keypoints information for each bbox. It is in normalized form [0-1]
152 repeated float keypoint_x = 9;
153 repeated float keypoint_y = 10;
154 }
156 // Annotation for each object instance.
157 message Annotation {
158 optional int32 instance_id = 1 [default = 0];
159 optional NormalizedBBox bbox = 2;
160 }
162 // Group of annotations for a particular label.
163 message AnnotationGroup {
164 optional int32 group_label = 1;
165 repeated Annotation annotation = 2;
166 }
168 // An extension of Datum which contains "rich" annotations.
169 message AnnotatedDatum {
170 enum AnnotationType {
171 BBOX = 0;
172 }
173 optional Datum datum = 1;
174 // If there are "rich" annotations, specify the type of annotation.
175 // Currently it only supports bounding box.
176 // If there are no "rich" annotations, use label in datum instead.
177 optional AnnotationType type = 2;
178 // Each group contains annotation for a particular class.
179 repeated AnnotationGroup annotation_group = 3;
180 // Unique record index assigned by Reader
181 optional uint32 record_id = 4 [default = 0];
182 }
184 enum DatumTypeInfo {
185 DatumTypeInfo_DATUM = 0;
186 DatumTypeInfo_ANNOTATED_DATUM = 1;
187 }
189 // Caffe 2 datasets support
190 message C2TensorProto {
191 // The dimensions in the tensor.
192 repeated int64 dims = 1;
193 enum DataType {
194 UNDEFINED = 0;
195 FLOAT = 1; // float
196 INT32 = 2; // int
197 BYTE = 3; // BYTE, when deserialized, is going to be restored as uint8.
198 STRING = 4; // string
199 // Less-commonly used data types.
200 BOOL = 5; // bool
201 UINT8 = 6; // uint8_t
202 INT8 = 7; // int8_t
203 UINT16 = 8; // uint16_t
204 INT16 = 9; // int16_t
205 INT64 = 10; // int64_t
206 FLOAT16 = 12; // caffe2::__f16, caffe2::float16
207 DOUBLE = 13; // double
208 }
209 optional DataType data_type = 2 [default = FLOAT];
210 // For float
211 repeated float float_data = 3 [packed = true];
212 // For int32, uint8, int8, uint16, int16, bool, and float16
213 // Note about float16: in storage we will basically convert float16 byte-wise
214 // to unsigned short and then store them in the int32_data field.
215 repeated int32 int32_data = 4 [packed = true];
216 // For bytes
217 optional bytes byte_data = 5;
218 // For strings
219 repeated bytes string_data = 6;
220 // For double
221 repeated double double_data = 9 [packed = true];
222 // For int64
223 repeated int64 int64_data = 10 [packed = true];
224 // Optionally, a name for the tensor.
225 optional string name = 7;
226 }
228 message C2TensorProtos {
229 repeated C2TensorProto protos = 1;
230 }
233 message FillerParameter {
234 // The filler type. Can be one of the following:
235 // constant, gaussian, positive_unitball, uniform, xavier,
236 // msra, bilinear, or their static versions:
237 // gaussianstatic, positive_unitballstatic, uniformstatic,
238 // xavierstatic, msrastatic, bilinearstatic
239 // In the static version the random number generator is only
240 // called once, and subsequent calls to fill blob, will write
241 // the same random numbers as in the first call. This is done
242 // to save time in cases where having same random numbers does
243 // not make a difference.
244 optional string type = 1 [default = 'constant'];
245 optional float value = 2 [default = 0]; // the value in constant filler
246 optional float min = 3 [default = 0]; // the min value in uniform filler
247 optional float max = 4 [default = 1]; // the max value in uniform filler
248 optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
249 optional float std = 6 [default = 1]; // the std value in Gaussian filler
250 // The expected number of non-zero output weights for a given input in
251 // Gaussian filler -- the default -1 means don't perform sparsification.
252 optional int32 sparse = 7 [default = -1];
253 // Normalize the filler variance by fan_in, fan_out, or their average.
254 // Applies to 'xavier' and 'msra' fillers.
255 enum VarianceNorm {
256 FAN_IN = 0;
257 FAN_OUT = 1;
258 AVERAGE = 2;
259 }
260 optional VarianceNorm variance_norm = 8 [default = FAN_IN];
261 }
263 message NetParameter {
264 optional string name = 1; // consider giving the network a name
266 // DEPRECATED. See InputParameter. The input blobs to the network.
267 repeated string input = 3;
268 // DEPRECATED. See InputParameter. The shape of the input blobs.
269 repeated BlobShape input_shape = 8;
271 // 4D input dimensions -- deprecated. Use "input_shape" instead.
272 // If specified, for each input blob there should be four
273 // values specifying the num, channels, height and width of the input blob.
274 // Thus, there should be a total of (4 * #input) numbers.
275 repeated int32 input_dim = 4;
277 // Whether the network will force every layer to carry out backward operation.
278 // If set False, then whether to carry out backward is determined
279 // automatically according to the net structure and learning rates.
280 optional bool force_backward = 5 [default = false];
281 // The current "state" of the network, including the phase, level, and stage.
282 // Some layers may be included/excluded depending on this state and the states
283 // specified in the layers' include and exclude fields.
284 optional NetState state = 6;
286 // Print debugging information about results while running Net::Forward,
287 // Net::Backward, and Net::Update.
288 optional bool debug_info = 7 [default = false];
290 // The layers that make up the net. Each of their configurations, including
291 // connectivity and behavior, is specified as a LayerParameter.
292 repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
294 // DEPRECATED: use 'layer' instead.
295 repeated V1LayerParameter layers = 2;
297 // Default types for all layers
298 // These work only when layer-specific ones are omitted
299 optional Type default_forward_type = 11 [default = FLOAT];
300 optional Type default_backward_type = 12 [default = FLOAT];
301 optional Type default_forward_math = 13 [default = FLOAT];
302 optional Type default_backward_math = 14 [default = FLOAT];
304 // Global gradient scaling coefficient K (default - no scaling)
305 //
306 // Scenario 1: global_grad_scale_adaptive: true
307 // If positive, gradients scaled by K*L where L is L_2 norm of all gradients in a Net.
308 // This helps to improve accuracy of reduced precision training.
309 //
310 // Scenario 2: global_grad_scale_adaptive: false
311 // If positive, gradients scaled by K.
312 optional float global_grad_scale = 15 [default = 1.];
313 optional bool global_grad_scale_adaptive = 16 [default = false];
315 // Sets the default "conv_algos_override" value for every convolution layer
316 optional string default_conv_algos_override = 17 [default = "-1,-1,-1"];
318 // While using multiple GPUs we have to run reduction process after every iteration.
319 // For better performance we unify multiple layers in buckets.
320 // This parameter sets approximate number of buckets to combine layers to.
321 // Default value is good for majority of nets.
322 optional int32 reduce_buckets = 18 [default = 3];
324 // Sets the default "cudnn_math_override" value for every layer
325 optional int32 default_cudnn_math_override = 19 [default = -1];
327 optional bool quantize = 200 [default = false];
328 optional NetQuantizationParameter net_quantization_param = 201;
329 }
331 // NOTE
332 // Update the next available ID when you add a new SolverParameter field.
333 //
334 // SolverParameter next available ID: 55 (last added: test_and_snapshot_last_epochs)
335 message SolverParameter {
336 //////////////////////////////////////////////////////////////////////////////
337 // Specifying the train and test networks
338 //
339 // Exactly one train net must be specified using one of the following fields:
340 // train_net_param, train_net, net_param, net
341 // One or more test nets may be specified using any of the following fields:
342 // test_net_param, test_net, net_param, net
343 // If more than one test net field is specified (e.g., both net and
344 // test_net are specified), they will be evaluated in the field order given
345 // above: (1) test_net_param, (2) test_net, (3) net_param/net.
346 // A test_iter must be specified for each test_net.
347 // A test_level and/or a test_stage may also be specified for each test_net.
348 //////////////////////////////////////////////////////////////////////////////
350 // Proto filename for the train net, possibly combined with one or more
351 // test nets.
352 optional string net = 24;
353 // Inline train net param, possibly combined with one or more test nets.
354 optional NetParameter net_param = 25;
356 optional string train_net = 1; // Proto filename for the train net.
357 repeated string test_net = 2; // Proto filenames for the test nets.
358 optional NetParameter train_net_param = 21; // Inline train net params.
359 repeated NetParameter test_net_param = 22; // Inline test net params.
361 // The states for the train/test nets. Must be unspecified or
362 // specified once per net.
363 //
364 // By default, all states will have solver = true;
365 // train_state will have phase = TRAIN,
366 // and all test_state's will have phase = TEST.
367 // Other defaults are set according to the NetState defaults.
368 optional NetState train_state = 26;
369 repeated NetState test_state = 27;
371 // Evaluation type.
372 optional string eval_type = 241 [default = "classification"];
373 // ap_version: different ways of computing Average Precision.
374 // Check https://sanchom.wordpress.com/tag/average-precision/ for details.
375 // 11point: the 11-point interpolated average precision. Used in VOC2007.
376 // MaxIntegral: maximally interpolated AP. Used in VOC2012/ILSVRC.
377 // Integral: the natural integral of the precision-recall curve.
378 optional string ap_version = 242 [default = "Integral"];
379 // If true, display per class result.
380 optional bool show_per_class_result = 244 [default = false];
382 // The number of iterations for each test net.
383 repeated int32 test_iter = 3;
385 // The number of iterations between two testing phases.
386 optional int32 test_interval = 4 [default = 0];
387 optional bool test_compute_loss = 19 [default = false];
388 // If true, run an initial test pass before the first iteration,
389 // ensuring memory availability and printing the starting value of the loss.
390 optional bool test_initialization = 32 [default = false];
392 optional int32 rampup_interval = 41 [default = 0];
393 optional float rampup_lr = 42 [default = 0.];
394 optional float min_lr = 43 [default = 0.];
396 optional float base_lr = 5; // The base learning rate
398 // the number of iterations between displaying info. If display = 0, no info
399 // will be displayed.
400 optional int32 display = 6;
401 // Display the loss averaged over the last average_loss iterations
402 optional int32 average_loss = 33 [default = 1];
403 optional int32 max_iter = 7; // the maximum number of iterations
404 // accumulate gradients over `iter_size` x `batch_size` instances
405 optional int32 iter_size = 36 [default = 1];
407 // The learning rate decay policy. The currently implemented learning rate
408 // policies are as follows:
409 // - fixed: always return base_lr.
410 // - step: return base_lr * gamma ^ (floor(iter / step))
411 // - exp: return base_lr * gamma ^ iter
412 // - inv: return base_lr * (1 + gamma * iter) ^ (- power)
413 // - multistep: similar to step but it allows non uniform steps defined by
414 // stepvalue
415 // - poly: the effective learning rate follows a polynomial decay, to be
416 // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
417 // - sigmoid: the effective learning rate follows a sigmod decay
418 // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
419 // - plateau: decreases lr
420 // if the minimum loss isn't updated for 'plateau_winsize' iters
421 //
422 // where base_lr, max_iter, gamma, step, stepvalue and power are defined
423 // in the solver parameter protocol buffer, and iter is the current iteration.
424 optional string lr_policy = 8;
425 optional float gamma = 9; // The parameter to compute the learning rate.
426 optional float power = 10; // The parameter to compute the learning rate.
427 optional float momentum = 11; // The momentum value.
429 optional string momentum_policy = 46 [default = "fixed"];
430 optional float max_momentum = 47 [default = 0.99];
431 optional float momentum_power = 48 [default = 1.];
433 // LARC - Layer-wise Adaptive Rate Control
434 optional bool larc = 49 [default = false];
435 optional string larc_policy = 50 [default = "scale"];
436 optional float larc_eta = 51 [default = 0.001];
438 optional float weight_decay = 12; // The weight decay.
439 optional string weight_decay_policy = 52 [default = "fixed"];
440 optional float weight_decay_power = 53 [default = 0.5];
442 // regularization types supported: L1 and L2
443 // controlled by weight_decay
444 optional string regularization_type = 29 [default = "L2"];
445 // the stepsize for learning rate policy "step"
446 optional int32 stepsize = 13;
447 // the stepsize for learning rate policy "multistep"
448 repeated int32 stepvalue = 34;
449 // the stepsize for learning rate policy "plateau"
450 repeated int32 plateau_winsize = 243;
452 // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
453 // whenever their actual L2 norm is larger.
454 optional float clip_gradients = 35 [default = -1];
456 optional int32 snapshot = 14 [default = 0]; // The snapshot interval
457 optional string snapshot_prefix = 15; // The prefix for the snapshot.
458 // whether to snapshot diff in the results or not. Snapshotting diff will help
459 // debugging but the final protocol buffer size will be much larger.
460 optional bool snapshot_diff = 16 [default = false];
461 enum SnapshotFormat {
462 HDF5 = 0;
463 BINARYPROTO = 1;
464 }
465 optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
466 // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
467 enum SolverMode {
468 CPU = 0;
469 GPU = 1;
470 }
471 optional SolverMode solver_mode = 17 [default = GPU];
472 // the device_id will that be used in GPU mode. Use device_id = 0 in default.
473 optional int32 device_id = 18 [default = 0];
474 // If non-negative, the seed with which the Solver will initialize the Caffe
475 // random number generator -- useful for reproducible results. Otherwise,
476 // (and by default) initialize using a seed derived from the system clock.
477 optional int64 random_seed = 20 [default = -1];
479 // type of the solver
480 optional string type = 40 [default = "SGD"];
482 // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
483 optional float delta = 31 [default = 1e-8];
484 // parameters for the Adam solver
485 optional float momentum2 = 39 [default = 0.999];
487 // RMSProp decay value
488 // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
489 optional float rms_decay = 38 [default = 0.99];
491 // If true, print information about the state of the net that may help with
492 // debugging learning problems.
493 optional bool debug_info = 23 [default = false];
495 // If false, don't save a snapshot after training finishes.
496 optional bool snapshot_after_train = 28 [default = true];
498 // DEPRECATED: old solver enum types, use string instead
499 enum SolverType {
500 SGD = 0;
501 NESTEROV = 1;
502 ADAGRAD = 2;
503 RMSPROP = 3;
504 ADADELTA = 4;
505 ADAM = 5;
506 }
507 // DEPRECATED: use type instead of solver_type
508 optional SolverType solver_type = 30 [default = SGD];
509 // Type used for storing weights and history
510 optional Type solver_data_type = 44 [default = FLOAT];
511 // If true:
512 // * Stores blobs in old (less efficient) BVLC-compatible format.
513 // * FP16 blobs are converted to FP32 and stored in 'data' container.
514 // * FP32 blobs are stored in 'data' container.
515 // * FP64 blobs are stored in 'double_data' container.
516 optional bool store_blobs_in_old_format = 45 [default = false];
517 // If set to N>0, makes Caffe to test and snapshot last N epochs
518 optional int32 test_and_snapshot_last_epochs = 54 [default = 0];
519 // Ignore mismatching blobs and continue while loading weights
520 optional bool ignore_shape_mismatch = 150 [default = true];
522 // Wite additional snapshot in txt format
523 optional bool snapshot_log = 151 [default = false];
525 // Sparsity params
526 optional int32 display_sparsity = 152 [default = 0];
527 optional SparseMode sparse_mode = 153 [default = SPARSE_NONE];
528 optional float sparsity_target = 154 [default = 0.0]; //desired sparsity as a fraction
529 optional float sparsity_step_factor = 155 [default = 0.01]; //sprsity step factor. 0.01 is 1%
530 optional int32 sparsity_step_iter = 156 [default = 1000]; //sprsity step increment iterations
531 optional int32 sparsity_start_iter = 157 [default = 0]; //sprsity start iteration
532 optional float sparsity_start_factor = 158 [default = 0.0]; //sprsity start factor
533 optional float sparsity_threshold_maxratio = 159 [default = 0.2]; //default ratio of the max threshold allowed to the max weight
534 optional bool sparsity_itr_increment_bfr_applying = 160 [default = true]; //true:itr increment before applying sparsity(old behaviour), false: after applying sparsity
535 optional float sparsity_threshold_value_max = 161 [default = 0.2]; //threshold_value_max
536 }
538 // A message that stores the solver snapshots
539 message SolverState {
540 optional int32 iter = 1; // The current iteration
541 optional string learned_net = 2; // The file that stores the learned net.
542 repeated BlobProto history = 3; // The history for sgd solvers
543 optional int32 current_step = 4 [default = 0]; // The current step for learning rate
544 optional float minimum_loss = 5 [default = 1E38]; // Historical minimum loss
545 optional int32 iter_last_event = 6 [default = 0]; // The iteration when last lr-update or min_loss-update happend
546 }
548 enum Phase {
549 TRAIN = 0;
550 TEST = 1;
551 }
553 message NetState {
554 optional Phase phase = 1 [default = TEST];
555 optional int32 level = 2 [default = 0];
556 repeated string stage = 3;
557 }
559 message NetStateRule {
560 // Set phase to require the NetState have a particular phase (TRAIN or TEST)
561 // to meet this rule.
562 optional Phase phase = 1;
564 // Set the minimum and/or maximum levels in which the layer should be used.
565 // Leave undefined to meet the rule regardless of level.
566 optional int32 min_level = 2;
567 optional int32 max_level = 3;
569 // Customizable sets of stages to include or exclude.
570 // The net must have ALL of the specified stages and NONE of the specified
571 // "not_stage"s to meet the rule.
572 // (Use multiple NetStateRules to specify conjunctions of stages.)
573 repeated string stage = 4;
574 repeated string not_stage = 5;
575 }
577 // Specifies training parameters (multipliers on global learning constants,
578 // and the name and other settings used for weight sharing).
579 message ParamSpec {
580 // The names of the parameter blobs -- useful for sharing parameters among
581 // layers, but never required otherwise. To share a parameter between two
582 // layers, give it a (non-empty) name.
583 optional string name = 1;
585 // Whether to require shared weights to have the same shape, or just the same
586 // count -- defaults to STRICT if unspecified.
587 optional DimCheckMode share_mode = 2;
588 enum DimCheckMode {
589 // STRICT (default) requires that num, channels, height, width each match.
590 STRICT = 0;
591 // PERMISSIVE requires only the count (num*channels*height*width) to match.
592 PERMISSIVE = 1;
593 }
595 // The multiplier on the global learning rate for this parameter.
596 optional float lr_mult = 3 [default = 1.0];
598 // The multiplier on the global weight decay for this parameter.
599 optional float decay_mult = 4 [default = 1.0];
600 }
602 // NOTE
603 // Update the next available ID when you add a new LayerParameter field.
604 //
605 // LayerParameter next available layer-specific ID: 152 (last added: recurrent_param)
606 message LayerParameter {
607 optional string name = 1; // the layer name
608 optional string type = 2; // the layer type
609 repeated string bottom = 3; // the name of each bottom blob
610 repeated string top = 4; // the name of each top blob
612 // Type returned by Forward routines of a particular layer (aka 'Ftype')
613 optional Type forward_type = 145 [default = FLOAT];
614 // Type returned by Backward routines of a particular layer (aka 'Btype')
615 optional Type backward_type = 146 [default = FLOAT];
616 // Internal math types. Works for those layers where internal math type
617 // could be different compared to Ftype or Btype. For example, so called
618 // "pseudo fp32 mode" in convolution layers. For other layers has no meaning.
619 optional Type forward_math = 147 [default = FLOAT];
620 optional Type backward_math = 148 [default = FLOAT];
622 optional bool debug = 149 [default = false];
624 // Sets the default cudnnMathType_t value for all cuDNN-based
625 // computations in current lyer, if applicable. Ignored otherwise.
626 // If negative or omitted, assumes implicit default and allows
627 // optimizers like cudnnFindConvolution*AlgorithmEx to choose the best type.
628 // If set to zero, enforces using CUDNN_DEFAULT_MATH everywhere in current lyer.
629 // If set to one, enforces using CUDNN_TENSOR_OP_MATH everywhere in current lyer.
630 optional int32 cudnn_math_override = 150 [default = -1];
632 // The train / test phase for computation.
633 optional Phase phase = 10;
635 // The amount of weight to assign each top blob in the objective.
636 // Each layer assigns a default value, usually of either 0 or 1,
637 // to each top blob.
638 repeated float loss_weight = 5;
640 // Specifies training parameters (multipliers on global learning constants,
641 // and the name and other settings used for weight sharing).
642 repeated ParamSpec param = 6;
644 // The blobs containing the numeric parameters of the layer.
645 repeated BlobProto blobs = 7;
647 // Specifies whether to backpropagate to each bottom. If unspecified,
648 // Caffe will automatically infer whether each input needs backpropagation
649 // to compute parameter gradients. If set to true for some inputs,
650 // backpropagation to those inputs is forced; if set false for some inputs,
651 // backpropagation to those inputs is skipped.
652 //
653 // The size must be either 0 or equal to the number of bottoms.
654 repeated bool propagate_down = 11;
656 // Rules controlling whether and when a layer is included in the network,
657 // based on the current NetState. You may specify a non-zero number of rules
658 // to include OR exclude, but not both. If no include or exclude rules are
659 // specified, the layer is always included. If the current NetState meets
660 // ANY (i.e., one or more) of the specified rules, the layer is
661 // included/excluded.
662 repeated NetStateRule include = 8;
663 repeated NetStateRule exclude = 9;
665 // Parameters for data pre-processing.
666 optional TransformationParameter transform_param = 100;
668 // Parameters shared by loss layers.
669 optional LossParameter loss_param = 101;
671 // Layer type-specific parameters.
672 //
673 // Note: certain layers may have more than one computational engine
674 // for their implementation. These layers include an Engine type and
675 // engine parameter for selecting the implementation.
676 // The default for the engine is set by the ENGINE switch at compile-time.
677 optional AccuracyParameter accuracy_param = 102;
678 optional AnnotatedDataParameter annotated_data_param = 200;
679 optional ArgMaxParameter argmax_param = 103;
680 optional BatchNormParameter batch_norm_param = 139;
681 optional BiasParameter bias_param = 141;
682 optional ConcatParameter concat_param = 104;
683 optional ContrastiveLossParameter contrastive_loss_param = 105;
684 optional ConvolutionParameter convolution_param = 106;
685 optional CropParameter crop_param = 144;
686 optional DataParameter data_param = 107;
687 optional DetectionEvaluateParameter detection_evaluate_param = 205;
688 optional DetectionOutputParameter detection_output_param = 204;
689 optional DropoutParameter dropout_param = 108;
690 optional DummyDataParameter dummy_data_param = 109;
691 optional EltwiseParameter eltwise_param = 110;
692 optional ELUParameter elu_param = 140;
693 optional EmbedParameter embed_param = 137;
694 optional ExpParameter exp_param = 111;
695 optional FlattenParameter flatten_param = 135;
696 optional HDF5DataParameter hdf5_data_param = 112;
697 optional HDF5OutputParameter hdf5_output_param = 113;
698 optional HingeLossParameter hinge_loss_param = 114;
699 optional ImageDataParameter image_data_param = 115;
700 optional InfogainLossParameter infogain_loss_param = 116;
701 optional InnerProductParameter inner_product_param = 117;
702 optional InputParameter input_param = 143;
703 optional LogParameter log_param = 134;
704 optional LRNParameter lrn_param = 118;
705 optional MemoryDataParameter memory_data_param = 119;
706 optional MultiBoxLossParameter multibox_loss_param = 201;
707 optional MVNParameter mvn_param = 120;
708 optional NormalizeParameter norm_param = 206;
709 optional PermuteParameter permute_param = 202;
710 optional PoolingParameter pooling_param = 121;
711 optional PowerParameter power_param = 122;
712 optional PReLUParameter prelu_param = 131;
713 optional PriorBoxParameter prior_box_param = 203;
714 optional PythonParameter python_param = 130;
715 optional ReductionParameter reduction_param = 136;
716 optional ReLUParameter relu_param = 123;
717 optional ReshapeParameter reshape_param = 133;
718 optional ScaleParameter scale_param = 142;
719 optional SigmoidParameter sigmoid_param = 124;
720 optional SoftmaxParameter softmax_param = 125;
721 optional SPPParameter spp_param = 132;
722 optional SliceParameter slice_param = 126;
723 optional TanHParameter tanh_param = 127;
724 optional ThresholdParameter threshold_param = 128;
725 optional TileParameter tile_param = 138;
726 optional VideoDataParameter video_data_param = 207;
727 optional WindowDataParameter window_data_param = 129;
728 optional RecurrentParameter recurrent_param = 151;
730 // NVIDIA PARAMETERS (Start with 68 because NV is 68 on an old-style phone)
731 optional DetectNetGroundTruthParameter detectnet_groundtruth_param = 6801;
732 optional DetectNetAugmentationParameter detectnet_augmentation_param = 6802;
734 // TI PARAMETERS (Start with 84 because TI is 84 on an old-style phone)
735 optional ImageLabelDataParameter image_label_data_param = 8403;
736 optional QuantizationParameter quantization_param = 8404;
737 }
739 // Message that stores parameters used to apply transformation
740 // to the data layer's data
741 message TransformationParameter {
742 enum InterpolationAlgo {
743 INTER_NEAREST = 0; //!< nearest neighbor interpolation
744 INTER_LINEAR = 1; //!< bilinear interpolation
745 INTER_CUBIC = 2; //!< bicubic interpolation
746 INTER_AREA = 3; //!< area-based (or super) interpolation
747 }
748 // When the images in a batch are of different shapes, we need to preprocess
749 // them into the same fixed shape, as downstream operations in caffe require
750 // images within a batch to be of the same shape.
751 //
752 // To transform one image of arbitrary shape into an image of fixed shape,
753 // we allow specifying a sequence of "variable-sized image transforms."
754 // There are three possible transforms, and it is possible for _all of them_
755 // to be enabled at the same time. They are always applied in the same order:
756 // (1) first random resize, (2) then random crop, (3) finally center crop.
757 // The last transform must be either a random crop or a center crop.
758 //
759 // The three supported transforms are as follows:
760 //
761 // 1. Random resize. This takes two parameters, "lower" and "upper," or
762 // "L" and "U" for short. If the original image has shape (oldW, oldH),
763 // the shorter side, D = min(oldW, oldH), is calculated. Then a resize
764 // target size R is chosing uniformly from the interval [L, U], and both
765 // sides of the original image are resized by a scaling factor R/D to yield
766 // a new image with shape (R/D * oldW, R/D * oldH).
767 //
768 // 2. Random crop. This takes one 'crop_size' parameter. A square region is randomly
769 // chosen from the image for cropping. Works in TRAIN phase only.
770 //
771 // 3. Center crop. This takes one 'crop_size' parameter. A square region is chosen
772 // from the center of the image for cropping. Works in TEST phase only.
773 //
774 optional uint32 img_rand_resize_lower = 10 [default = 0];
775 optional uint32 img_rand_resize_upper = 11 [default = 0];
777 // Limits for randomly generated ratio R so that longer side
778 // length would be set to shorter side length multiplied by R.
779 // If applied to sguare, the shorter side is chosen randomly.
780 // Pair {1,1} means "resize image to square (by shortest side)".
781 // Values less than 1 are ignored.
782 optional float rand_resize_ratio_lower = 12 [default = 0];
783 optional float rand_resize_ratio_upper = 13 [default = 0];
785 // Limits for randomly generated vertical stretch, i.e.
786 // "height" *= "vertical_stretch" where
787 // "vertical_stretch" = Rand(vertical_stretch_lower,vertical_stretch_upper).
788 // Pair {1,1} means "do nothing".
789 optional float vertical_stretch_lower = 14 [default = 1];
790 optional float vertical_stretch_upper = 15 [default = 1];
792 // Limits for randomly generated horizontal stretch, i.e.
793 // "width" *= "horizontal_stretch" where
794 // "horizontal_stretch" = Rand(horizontal_stretch_lower,horizontal_stretch_upper).
795 // Pair {1,1} means "do nothing".
796 optional float horizontal_stretch_lower = 16 [default = 1];
797 optional float horizontal_stretch_upper = 17 [default = 1];
799 // OpenCV algorithm used for downsampling
800 optional InterpolationAlgo interpolation_algo_down = 18 [default = INTER_NEAREST];
801 // OpenCV algorithm used for upsampling
802 optional InterpolationAlgo interpolation_algo_up = 19 [default = INTER_CUBIC];
803 // No upscale by default no matter what resize parameters are chosen
804 optional bool allow_upscale = 20 [default = false];
806 // If followed by CuDNN, set to NHWC for better performance
807 optional Packing forward_packing = 21 [default = NCHW];
809 // For data pre-processing, we can do simple scaling and subtracting the
810 // data mean, if provided. Note that the mean subtraction is always carried
811 // out before scaling.
812 optional float scale = 1 [default = 1];
813 // Specify if we want to randomly mirror data.
814 optional bool mirror = 2 [default = false];
815 // Specify if we would like to randomly crop an image.
816 optional uint32 crop_size = 3 [default = 0];
817 optional uint32 crop_h = 211 [default = 0];
818 optional uint32 crop_w = 212 [default = 0];
820 // mean_file and mean_value cannot be specified at the same time
821 optional string mean_file = 4;
822 // if specified can be repeated once (would substract it from all the channels)
823 // or can be repeated the same number of times as channels
824 // (would subtract them from the corresponding channel)
825 repeated float mean_value = 5;
826 // Force the decoded image to have 3 color channels.
827 optional bool force_color = 6 [default = false];
828 // Force the decoded image to have 1 color channels.
829 optional bool force_gray = 7 [default = false];
830 // Run the transform (synchronously) on the GPU
831 // False if omitted when Forward Type is float/double.
832 // True otherwise (float16 doesn't work well on CPU).
833 optional bool use_gpu_transform = 8 [default = false];
834 // If non-negative, the seed with which the transformer's
835 // random number generator would be initialized -- useful for reproducible results.
836 // Otherwise, (and by default) initialize using a seed derived from the system clock.
837 optional int64 random_seed = 9 [default = -1];
839 optional bool display = 22 [default = false];
840 optional int32 num_labels = 23 [default = 0];
842 // Resize policy
843 optional ResizeParameter resize_param = 208;
844 // Noise policy
845 optional NoiseParameter noise_param = 209;
846 // Distortion policy
847 optional DistortionParameter distort_param = 213;
848 // Expand policy
849 optional ExpansionParameter expand_param = 214;
850 // Constraint for emitting the annotation after transformation.
851 optional EmitConstraint emit_constraint = 210;
852 }
853 // Message that stores parameters used to create gridbox ground truth
854 message DetectNetGroundTruthParameter {
855 // stride of gridbox with respect to image size
856 optional uint32 stride = 1 [default = 4];
857 // coverage region scale with respect to bounding box size
858 optional float scale_cvg = 2 [default = 0.5];
859 enum GridboxType {
860 GRIDBOX_MAX = 0;
861 GRIDBOX_MIN = 1;
862 }
863 // determines coverage region's maximum and minimum dimensions
864 optional GridboxType gridbox_type = 3 [default = GRIDBOX_MAX];
865 // if gridbox_type is equal to GRIDBOX_MAX, the maximum size a given coverage
866 // region may take.
867 optional uint32 max_cvg_len = 4 [default = 50];
868 // if gridbox_type is equal to GRIDBOX_MIN, the minimum size a given coverage
869 // region may take.
870 optional uint32 min_cvg_len = 5 [default = 50];
871 enum CoverageType {
872 RECTANGULAR = 0;
873 }
874 // shape of the coverage geometry.
875 optional CoverageType coverage_type = 7 [default = RECTANGULAR];
876 // Size that incoming images are cropped / scaled to during training / test
877 // time. The network will only see images of this size.
878 optional uint32 image_size_x = 8 [default = 1248];
879 optional uint32 image_size_y = 9 [default = 384];
880 // coverage proportional to the size of the gridbox region overlapping the
881 // normalize object coverage by the size of the object
882 optional bool obj_norm = 11 [default = false];
883 // crop incoming bboxes such that their bounds remain inside the gridbox.
884 optional bool crop_bboxes = 12 [default = true];
885 // Integer and target index of classes to be included:
886 message ClassMapping {
887 required uint32 src = 1;
888 required uint32 dst = 2;
889 }
890 repeated ClassMapping object_class = 13;
891 }
893 // Message that stores parameters used to apply image and label augmentations
894 // specific to NVDataLayer's online augmentation module
895 message DetectNetAugmentationParameter {
896 // probability that a random crop of the training image will be taken at test
897 // time. If image dimensions are less than input size and random cropping is
898 // off, then the image will be scaled deterministically to the input size.
899 optional float crop_prob = 1 [default = 1.0];
900 // number of pixels to shift the image by. If cropping is enabled, this number
901 // is added to the range of possible crop offset values.
902 optional uint32 shift_x = 2 [default = 0];
903 optional uint32 shift_y = 3 [default = 0];
904 // probability that the training image will be scaled at test time. 0 turns
905 // scale augmentation off.
906 optional float scale_prob = 4 [default = 0.33];
907 // minimum and maximum scaling factor. 1.0 implies no scaling.
908 optional float scale_min = 5 [default = 0.7];
909 optional float scale_max = 6 [default = 1.0];
910 // probability that image will be flipped across the Y axis. 0 turns flip
911 // augmentation off.
912 optional float flip_prob = 7 [default = 0.33];
913 // maximum angle in degrees (in both directions) image may be rotated. 0 turns
914 // rotation augmentation off.
915 optional float rotation_prob = 8 [default = 0.33];
916 optional float max_rotate_degree = 9 [default = 1.0];
917 // maximum rotation of hue in degrees (in both directions). 0 turns hue
918 // augmentation off.
919 optional float hue_rotation_prob = 10 [default = 0.33];
920 optional float hue_rotation = 11 [default = 15];
921 // maximum desaturation parameter. 1.0 may convert RGB to luminance. 0 turns
922 // desaturation augmentation off.
923 optional float desaturation_prob = 12 [default = 0.33];
924 optional float desaturation_max = 13 [default = 0.5];
926 // Resize policy
927 optional ResizeParameter resize_param = 208;
928 // Noise policy
929 optional NoiseParameter noise_param = 209;
930 // Distortion policy
931 optional DistortionParameter distort_param = 213;
932 // Expand policy
933 optional ExpansionParameter expand_param = 214;
934 // Constraint for emitting the annotation after transformation.
935 optional EmitConstraint emit_constraint = 210;
936 }
938 // Message that stores parameters used by data transformer for resize policy
939 message ResizeParameter {
940 //Probability of using this resize policy
941 optional float prob = 1 [default = 1];
943 enum Resize_mode {
944 WARP = 1;
945 FIT_SMALL_SIZE = 2;
946 FIT_LARGE_SIZE_AND_PAD = 3;
947 }
948 optional Resize_mode resize_mode = 2 [default = WARP];
949 optional uint32 height = 3 [default = 0];
950 optional uint32 width = 4 [default = 0];
951 // A parameter used to update bbox in FIT_SMALL_SIZE mode.
952 optional uint32 height_scale = 8 [default = 0];
953 optional uint32 width_scale = 9 [default = 0];
955 enum Pad_mode {
956 CONSTANT = 1;
957 MIRRORED = 2;
958 REPEAT_NEAREST = 3;
959 }
960 // Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering
961 optional Pad_mode pad_mode = 5 [default = CONSTANT];
962 // if specified can be repeated once (would fill all the channels)
963 // or can be repeated the same number of times as channels
964 // (would use it them to the corresponding channel)
965 repeated float pad_value = 6;
967 enum Interp_mode { //Same as in OpenCV
968 LINEAR = 1;
969 AREA = 2;
970 NEAREST = 3;
971 CUBIC = 4;
972 LANCZOS4 = 5;
973 }
974 //interpolation for for resizing
975 repeated Interp_mode interp_mode = 7;
976 }
978 message SaltPepperParameter {
979 //Percentage of pixels
980 optional float fraction = 1 [default = 0];
981 repeated float value = 2;
982 }
984 // Message that stores parameters used by data transformer for transformation
985 // policy
986 message NoiseParameter {
987 //Probability of using this resize policy
988 optional float prob = 1 [default = 0];
989 // Histogram equalized
990 optional bool hist_eq = 2 [default = false];
991 // Color inversion
992 optional bool inverse = 3 [default = false];
993 // Grayscale
994 optional bool decolorize = 4 [default = false];
995 // Gaussian blur
996 optional bool gauss_blur = 5 [default = false];
998 // JPEG compression quality (-1 = no compression)
999 optional float jpeg = 6 [default = -1];
1001 // Posterization
1002 optional bool posterize = 7 [default = false];
1004 // Erosion
1005 optional bool erode = 8 [default = false];
1007 // Salt-and-pepper noise
1008 optional bool saltpepper = 9 [default = false];
1010 optional SaltPepperParameter saltpepper_param = 10;
1012 // Local histogram equalization
1013 optional bool clahe = 11 [default = false];
1015 // Color space conversion
1016 optional bool convert_to_hsv = 12 [default = false];
1018 // Color space conversion
1019 optional bool convert_to_lab = 13 [default = false];
1020 }
1022 // Message that stores parameters used by data transformer for distortion policy
1023 message DistortionParameter {
1024 // The probability of adjusting brightness.
1025 optional float brightness_prob = 1 [default = 0.0];
1026 // Amount to add to the pixel values within [-delta, delta].
1027 // The possible value is within [0, 255]. Recommend 32.
1028 optional float brightness_delta = 2 [default = 0.0];
1030 // The probability of adjusting contrast.
1031 optional float contrast_prob = 3 [default = 0.0];
1032 // Lower bound for random contrast factor. Recommend 0.5.
1033 optional float contrast_lower = 4 [default = 0.0];
1034 // Upper bound for random contrast factor. Recommend 1.5.
1035 optional float contrast_upper = 5 [default = 0.0];
1037 // The probability of adjusting hue.
1038 optional float hue_prob = 6 [default = 0.0];
1039 // Amount to add to the hue channel within [-delta, delta].
1040 // The possible value is within [0, 180]. Recommend 36.
1041 optional float hue_delta = 7 [default = 0.0];
1043 // The probability of adjusting saturation.
1044 optional float saturation_prob = 8 [default = 0.0];
1045 // Lower bound for the random saturation factor. Recommend 0.5.
1046 optional float saturation_lower = 9 [default = 0.0];
1047 // Upper bound for the random saturation factor. Recommend 1.5.
1048 optional float saturation_upper = 10 [default = 0.0];
1050 // The probability of randomly order the image channels.
1051 optional float random_order_prob = 11 [default = 0.0];
1052 }
1054 // Message that stores parameters used by data transformer for expansion policy
1055 message ExpansionParameter {
1056 //Probability of using this expansion policy
1057 optional float prob = 1 [default = 1];
1059 // The ratio to expand the image.
1060 optional float max_expand_ratio = 2 [default = 1.];
1061 }
1063 // Message that stores parameters shared by loss layers
1064 message LossParameter {
1065 // If specified, ignore instances with the given label.
1066 optional int32 ignore_label = 1;
1067 // How to normalize the loss for loss layers that aggregate across batches,
1068 // spatial dimensions, or other dimensions. Currently only implemented in
1069 // SoftmaxWithLoss layer.
1070 enum NormalizationMode {
1071 // Divide by the number of examples in the batch times spatial dimensions.
1072 // Outputs that receive the ignore label will NOT be ignored in computing
1073 // the normalization factor.
1074 FULL = 0;
1075 // Divide by the total number of output locations that do not take the
1076 // ignore_label. If ignore_label is not set, this behaves like FULL.
1077 VALID = 1;
1078 // Divide by the batch size.
1079 BATCH_SIZE = 2;
1080 // Do not normalize the loss.
1081 NONE = 3;
1082 }
1083 optional NormalizationMode normalization = 3 [default = VALID];
1084 // Deprecated. Ignored if normalization is specified. If normalization
1085 // is not specified, then setting this to false will be equivalent to
1086 // normalization = BATCH_SIZE to be consistent with previous behavior.
1087 optional bool normalize = 2;
1089 // label weights
1090 repeated float label_weights = 4;
1091 }
1093 // Messages that store parameters used by individual layer types follow, in
1094 // alphabetical order.
1096 message AccuracyParameter {
1097 // When computing accuracy, count as correct by comparing the true label to
1098 // the top k scoring classes. By default, only compare to the top scoring
1099 // class (i.e. argmax).
1100 optional uint32 top_k = 1 [default = 1];
1102 // The "label" axis of the prediction blob, whose argmax corresponds to the
1103 // predicted label -- may be negative to index from the end (e.g., -1 for the
1104 // last axis). For example, if axis == 1 and the predictions are
1105 // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
1106 // labels with integer values in {0, 1, ..., C-1}.
1107 optional int32 axis = 2 [default = 1];
1109 // If specified, ignore instances with the given label.
1110 optional int32 ignore_label = 3;
1111 }
1113 message AnnotatedDataParameter {
1114 // Define the sampler.
1115 repeated BatchSampler batch_sampler = 1;
1116 // Store label name and label id in LabelMap format.
1117 optional string label_map_file = 2;
1118 // If provided, it will replace the AnnotationType stored in each
1119 // AnnotatedDatum.
1120 optional AnnotatedDatum.AnnotationType anno_type = 3;
1121 }
1123 message ArgMaxParameter {
1124 // If true produce pairs (argmax, maxval)
1125 optional bool out_max_val = 1 [default = false];
1126 optional uint32 top_k = 2 [default = 1];
1127 // The axis along which to maximise -- may be negative to index from the
1128 // end (e.g., -1 for the last axis).
1129 // By default ArgMaxLayer maximizes over the flattened trailing dimensions
1130 // for each index of the first / num dimension.
1131 optional int32 axis = 3;
1132 }
1134 message ConcatParameter {
1135 // The axis along which to concatenate -- may be negative to index from the
1136 // end (e.g., -1 for the last axis). Other axes must have the
1137 // same dimension for all the bottom blobs.
1138 // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
1139 optional int32 axis = 2 [default = 1];
1141 // DEPRECATED: alias for "axis" -- does not support negative indexing.
1142 optional uint32 concat_dim = 1 [default = 1];
1143 }
1145 message BatchNormParameter {
1146 // If false, accumulate global mean/variance values via a moving average. If
1147 // true, use those accumulated values instead of computing mean/variance
1148 // across the batch.
1149 optional bool use_global_stats = 1 [default = false];
1150 // How much does the moving average decay each iteration?
1151 optional float moving_average_fraction = 2 [default = .999];
1152 // Small value to add to the variance estimate so that we don't divide by
1153 // zero.
1154 optional float eps = 3 [default = 1e-5];
1155 optional FillerParameter scale_filler = 5;
1156 optional FillerParameter bias_filler = 6;
1157 optional bool scale_bias = 7 [default = false];
1158 enum Engine {
1159 DEFAULT = 0;
1160 CAFFE = 1;
1161 CUDNN = 2;
1162 }
1163 optional Engine engine = 15 [default = DEFAULT];
1164 }
1166 message BiasParameter {
1167 // The first axis of bottom[0] (the first input Blob) along which to apply
1168 // bottom[1] (the second input Blob). May be negative to index from the end
1169 // (e.g., -1 for the last axis).
1170 //
1171 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
1172 // top[0] will have the same shape, and bottom[1] may have any of the
1173 // following shapes (for the given value of axis):
1174 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
1175 // (axis == 1 == -3) 3; 3x40; 3x40x60
1176 // (axis == 2 == -2) 40; 40x60
1177 // (axis == 3 == -1) 60
1178 // Furthermore, bottom[1] may have the empty shape (regardless of the value of
1179 // "axis") -- a scalar bias.
1180 optional int32 axis = 1 [default = 1];
1182 // (num_axes is ignored unless just one bottom is given and the bias is
1183 // a learned parameter of the layer. Otherwise, num_axes is determined by the
1184 // number of axes by the second bottom.)
1185 // The number of axes of the input (bottom[0]) covered by the bias
1186 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
1187 // Set num_axes := 0, to add a zero-axis Blob: a scalar.
1188 optional int32 num_axes = 2 [default = 1];
1190 // (filler is ignored unless just one bottom is given and the bias is
1191 // a learned parameter of the layer.)
1192 // The initialization for the learned bias parameter.
1193 // Default is the zero (0) initialization, resulting in the BiasLayer
1194 // initially performing the identity operation.
1195 optional FillerParameter filler = 3;
1196 }
1198 message ContrastiveLossParameter {
1199 // margin for dissimilar pair
1200 optional float margin = 1 [default = 1.0];
1201 // The first implementation of this cost did not exactly match the cost of
1202 // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
1203 // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
1204 // Hadsell paper. New models should probably use this version.
1205 // legacy_version = true uses (margin - d^2). This is kept to support /
1206 // reproduce existing models and results
1207 optional bool legacy_version = 2 [default = false];
1208 }
1210 message ConvolutionParameter {
1211 optional uint32 num_output = 1; // The number of outputs for the layer
1212 optional bool bias_term = 2 [default = true]; // whether to have bias terms
1214 // Pad, kernel size, and stride are all given as a single value for equal
1215 // dimensions in all spatial dimensions, or once per spatial dimension.
1216 repeated uint32 pad = 3; // The padding size; defaults to 0
1217 repeated uint32 kernel_size = 4; // The kernel size
1218 repeated uint32 stride = 6; // The stride; defaults to 1
1219 // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
1220 // holes. (Kernel dilation is sometimes referred to by its use in the
1221 // algorithme à trous from Holschneider et al. 1987.)
1222 repeated uint32 dilation = 18; // The dilation; defaults to 1
1224 // For 2D convolution only, the *_h and *_w versions may also be used to
1225 // specify both spatial dimensions.
1226 optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
1227 optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
1228 optional uint32 kernel_h = 11; // The kernel height (2D only)
1229 optional uint32 kernel_w = 12; // The kernel width (2D only)
1230 optional uint32 stride_h = 13; // The stride height (2D only)
1231 optional uint32 stride_w = 14; // The stride width (2D only)
1233 optional uint32 group = 5 [default = 1]; // The group size for group conv
1235 optional FillerParameter weight_filler = 7; // The filler for the weight
1236 optional FillerParameter bias_filler = 8; // The filler for the bias
1237 enum Engine {
1238 DEFAULT = 0;
1239 CAFFE = 1;
1240 CUDNN = 2;
1241 }
1242 optional Engine engine = 15 [default = DEFAULT];
1244 // The axis to interpret as "channels" when performing convolution.
1245 // Preceding dimensions are treated as independent inputs;
1246 // succeeding dimensions are treated as "spatial".
1247 // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
1248 // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
1249 // groups g>1) filters across the spatial axes (H, W) of the input.
1250 // With (N, C, D, H, W) inputs, and axis == 1, we perform
1251 // N independent 3D convolutions, sliding (C/g)-channels
1252 // filters across the spatial axes (D, H, W) of the input.
1253 optional int32 axis = 16 [default = 1];
1255 // Whether to force use of the general ND convolution, even if a specific
1256 // implementation for blobs of the appropriate number of spatial dimensions
1257 // is available. (Currently, there is only a 2D-specific convolution
1258 // implementation; for input blobs with num_axes != 2, this option is
1259 // ignored and the ND implementation will be used.)
1260 optional bool force_nd_im2col = 17 [default = false];
1261 enum CuDNNConvolutionAlgorithmSeeker {
1262 GET = 0;
1263 FINDEX = 1;
1264 }
1265 //Specifies which cudnn routine should be used to find the best convolution algorithm
1266 optional CuDNNConvolutionAlgorithmSeeker cudnn_convolution_algo_seeker = 19 [default = FINDEX];
1268 // If set to a non-negative value, enforces using the algo by the index provided.
1269 // It has priority over CuDNNConvolutionAlgorithmSeeker and essentially disables seeking.
1270 // The index should correspond to the ordinal in structures cudnnConvolutionFwdAlgo_t,
1271 // cudnnConvolutionBwdDataAlgo_t and cudnnConvolutionBwdFilterAlgo_t.
1272 // For example, conv_algos_override set to "7,5,7" means using CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED,
1273 // CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED and CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED
1274 // correspondingly.
1275 optional string conv_algos_override = 20 [default = "-1,-1,-1"];
1276 }
1278 message CropParameter {
1279 // To crop, elements of the first bottom are selected to fit the dimensions
1280 // of the second, reference bottom. The crop is configured by
1281 // - the crop `axis` to pick the dimensions for cropping
1282 // - the crop `offset` to set the shift for all/each dimension
1283 // to align the cropped bottom with the reference bottom.
1284 // All dimensions up to but excluding `axis` are preserved, while
1285 // the dimensions including and trailing `axis` are cropped.
1286 // If only one `offset` is set, then all dimensions are offset by this amount.
1287 // Otherwise, the number of offsets must equal the number of cropped axes to
1288 // shift the crop in each dimension accordingly.
1289 // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
1290 // and `axis` may be negative to index from the end (e.g., -1 for the last
1291 // axis).
1292 optional int32 axis = 1 [default = 2];
1293 repeated uint32 offset = 2;
1294 }
1296 message DataParameter {
1297 enum DB {
1298 LEVELDB = 0;
1299 LMDB = 1;
1300 }
1301 // Specify the data source.
1302 optional string source = 1;
1303 // Specify the batch size.
1304 optional uint32 batch_size = 4;
1305 // The rand_skip variable is for the data layer to skip a few data points
1306 // to avoid all asynchronous sgd clients to start at the same point. The skip
1307 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1308 // be larger than the number of keys in the database.
1309 // DEPRECATED. Each solver accesses a different subset of the database.
1310 optional uint32 rand_skip = 7 [default = 0];
1311 optional DB backend = 8 [default = LEVELDB];
1312 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
1313 // simple scaling and subtracting the data mean, if provided. Note that the
1314 // mean subtraction is always carried out before scaling.
1315 optional float scale = 2 [default = 1];
1316 optional string mean_file = 3;
1317 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
1318 // crop an image.
1319 optional uint32 crop_size = 5 [default = 0];
1320 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
1321 // data.
1322 optional bool mirror = 6 [default = false];
1323 // Force the encoded image to have 3 color channels
1324 optional bool force_encoded_color = 9 [default = false];
1325 // Deprecated
1326 optional uint32 prefetch = 10 [default = 0];
1327 // Number of Data Transformer threads per GPU. If 0 Caffe optimizes it automatically.
1328 // Takes effect only when used together with 'parser_threads' setting.
1329 // Ignored and always set to 1 for test nets.
1330 optional uint32 threads = 11 [default = 0];
1331 // Number of Parser threads per GPU. If 0 Caffe optimizes it automatically.
1332 // Takes effect only when used together with 'threads' setting.
1333 // Ignored and always set to 1 for test nets.
1334 optional uint32 parser_threads = 12 [default = 0];
1335 // Cache observations while reading
1336 optional bool cache = 13 [default = false];
1337 // Shuffle observations while reading for better accuracy. Ignored if 'cache' is false.
1338 optional bool shuffle = 14 [default = false];
1339 }
1341 // Message that store parameters used by DetectionEvaluateLayer
1342 message DetectionEvaluateParameter {
1343 // Number of classes that are actually predicted. Required!
1344 optional uint32 num_classes = 1;
1345 // Label id for background class. Needed for sanity check so that
1346 // background class is neither in the ground truth nor the detections.
1347 optional uint32 background_label_id = 2 [default = 0];
1348 // Threshold for deciding true/false positive.
1349 optional float overlap_threshold = 3 [default = 0.5];
1350 // If true, also consider difficult ground truth for evaluation.
1351 optional bool evaluate_difficult_gt = 4 [default = true];
1352 // A file which contains a list of names and sizes with same order
1353 // of the input DB. The file is in the following format:
1354 // name height width
1355 // ...
1356 // If provided, we will scale the prediction and ground truth NormalizedBBox
1357 // for evaluation.
1358 optional string name_size_file = 5;
1359 // The resize parameter used in converting NormalizedBBox to original image.
1360 optional ResizeParameter resize_param = 6;
1361 optional int32 num_keypoint = 7 [default = 0];
1362 }
1364 message NonMaximumSuppressionParameter {
1365 // Threshold to be used in nms.
1366 optional float nms_threshold = 1 [default = 0.3];
1367 // Maximum number of results to be kept.
1368 optional int32 top_k = 2;
1369 // Parameter for adaptive nms.
1370 optional float eta = 3 [default = 1.0];
1371 }
1373 message SaveOutputParameter {
1374 // Output directory. If not empty, we will save the results.
1375 optional string output_directory = 1;
1376 // Output name prefix.
1377 optional string output_name_prefix = 2;
1378 // Output format.
1379 // VOC - PASCAL VOC output format.
1380 // COCO - MS COCO output format.
1381 optional string output_format = 3;
1382 // If you want to output results, must also provide the following two files.
1383 // Otherwise, we will ignore saving results.
1384 // label map file.
1385 optional string label_map_file = 4;
1386 // A file which contains a list of names and sizes with same order
1387 // of the input DB. The file is in the following format:
1388 // name height width
1389 // ...
1390 optional string name_size_file = 5;
1391 // Number of test images. It can be less than the lines specified in
1392 // name_size_file. For example, when we only want to evaluate on part
1393 // of the test images.
1394 optional uint32 num_test_image = 6;
1395 // The resize parameter used in saving the data.
1396 optional ResizeParameter resize_param = 7;
1397 }
1399 // Message that store parameters used by DetectionOutputLayer
1400 message DetectionOutputParameter {
1401 // Number of classes to be predicted. Required!
1402 optional uint32 num_classes = 1;
1403 // If true, bounding box are shared among different classes.
1404 optional bool share_location = 2 [default = true];
1405 // Background label id. If there is no background class,
1406 // set it as -1.
1407 optional int32 background_label_id = 3 [default = 0];
1408 // Parameters used for non maximum suppression.
1409 optional NonMaximumSuppressionParameter nms_param = 4;
1410 // Parameters used for saving detection results.
1411 optional SaveOutputParameter save_output_param = 5;
1412 // Type of coding method for bbox.
1413 optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
1414 // If true, variance is encoded in target; otherwise we need to adjust the
1415 // predicted offset accordingly.
1416 optional bool variance_encoded_in_target = 8 [default = false];
1417 // Number of total bboxes to be kept per image after nms step.
1418 // -1 means keeping all bboxes after nms step.
1419 optional int32 keep_top_k = 7 [default = -1];
1420 // Only consider detections whose confidences are larger than a threshold.
1421 // If not provided, consider all boxes.
1422 optional float confidence_threshold = 9;
1423 // If true, visualize the detection results.
1424 optional bool visualize = 10 [default = false];
1425 // The threshold used to visualize the detection results.
1426 optional float visualize_threshold = 11;
1427 // If provided, save outputs to video file.
1428 optional string save_file = 12;
1429 // Total number of keypoints predicted along with each bounding box.
1430 optional uint32 num_keypoint = 13 [default = 0];
1431 }
1433 message DropoutParameter {
1434 optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
1435 enum Engine {
1436 DEFAULT = 0;
1437 CAFFE = 1;
1438 CUDNN = 2;
1439 }
1440 optional Engine engine = 2 [default = DEFAULT];
1441 optional int64 random_seed = 3 [default = -1];
1442 }
1444 // DummyDataLayer fills any number of arbitrarily shaped blobs with random
1445 // (or constant) data generated by "Fillers" (see "message FillerParameter").
1446 message DummyDataParameter {
1447 // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N
1448 // shape fields, and 0, 1 or N data_fillers.
1449 //
1450 // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
1451 // If 1 data_filler is specified, it is applied to all top blobs. If N are
1452 // specified, the ith is applied to the ith top blob.
1453 repeated FillerParameter data_filler = 1;
1454 repeated BlobShape shape = 6;
1456 // 4D dimensions -- deprecated. Use "shape" instead.
1457 repeated uint32 num = 2;
1458 repeated uint32 channels = 3;
1459 repeated uint32 height = 4;
1460 repeated uint32 width = 5;
1461 }
1463 message EltwiseParameter {
1464 enum EltwiseOp {
1465 PROD = 0;
1466 SUM = 1;
1467 MAX = 2;
1468 }
1469 optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
1470 repeated float coeff = 2; // blob-wise coefficient for SUM operation
1472 // Whether to use an asymptotically slower (for >2 inputs) but stabler method
1473 // of computing the gradient for the PROD operation. (No effect for SUM op.)
1474 optional bool stable_prod_grad = 3 [default = true];
1475 }
1477 // Message that stores parameters used by ELULayer
1478 message ELUParameter {
1479 // Described in:
1480 // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
1481 // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
1482 optional float alpha = 1 [default = 1.];
1483 optional float lambda = 2 [default = 1.];
1484 }
1486 // Message that stores parameters used by EmbedLayer
1487 message EmbedParameter {
1488 optional uint32 num_output = 1; // The number of outputs for the layer
1489 // The input is given as integers to be interpreted as one-hot
1490 // vector indices with dimension num_input. Hence num_input should be
1491 // 1 greater than the maximum possible input value.
1492 optional uint32 input_dim = 2;
1494 optional bool bias_term = 3 [default = true]; // Whether to use a bias term
1495 optional FillerParameter weight_filler = 4; // The filler for the weight
1496 optional FillerParameter bias_filler = 5; // The filler for the bias
1498 }
1500 // Message that stores parameters used by ExpLayer
1501 message ExpParameter {
1502 // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
1503 // Or if base is set to the default (-1), base is set to e,
1504 // so y = exp(shift + scale * x).
1505 optional float base = 1 [default = -1.0];
1506 optional float scale = 2 [default = 1.0];
1507 optional float shift = 3 [default = 0.0];
1508 }
1510 /// Message that stores parameters used by FlattenLayer
1511 message FlattenParameter {
1512 // The first axis to flatten: all preceding axes are retained in the output.
1513 // May be negative to index from the end (e.g., -1 for the last axis).
1514 optional int32 axis = 1 [default = 1];
1516 // The last axis to flatten: all following axes are retained in the output.
1517 // May be negative to index from the end (e.g., the default -1 for the last
1518 // axis).
1519 optional int32 end_axis = 2 [default = -1];
1520 }
1522 // Message that stores parameters used by HDF5DataLayer
1523 message HDF5DataParameter {
1524 // Specify the data source.
1525 optional string source = 1;
1526 // Specify the batch size.
1527 optional uint32 batch_size = 2;
1529 // Specify whether to shuffle the data.
1530 // If shuffle == true, the ordering of the HDF5 files is shuffled,
1531 // and the ordering of data within any given HDF5 file is shuffled,
1532 // but data between different files are not interleaved; all of a file's
1533 // data are output (in a random order) before moving onto another file.
1534 optional bool shuffle = 3 [default = false];
1535 }
1537 message HDF5OutputParameter {
1538 optional string file_name = 1;
1539 }
1541 message HingeLossParameter {
1542 enum Norm {
1543 L1 = 1;
1544 L2 = 2;
1545 }
1546 // Specify the Norm to use L1 or L2
1547 optional Norm norm = 1 [default = L1];
1548 }
1550 message ImageDataParameter {
1551 // Specify the data source.
1552 optional string source = 1;
1553 // Specify the batch size.
1554 optional uint32 batch_size = 4 [default = 1];
1555 // The rand_skip variable is for the data layer to skip a few data points
1556 // to avoid all asynchronous sgd clients to start at the same point. The skip
1557 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1558 // be larger than the number of keys in the database.
1559 optional uint32 rand_skip = 7 [default = 0];
1560 // Whether or not ImageLayer should shuffle the list of files at every epoch.
1561 optional bool shuffle = 8 [default = false];
1562 // It will also resize images if new_height or new_width are not zero.
1563 optional uint32 new_height = 9 [default = 0];
1564 optional uint32 new_width = 10 [default = 0];
1565 // Specify if the images are color or gray
1566 optional bool is_color = 11 [default = true];
1567 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
1568 // simple scaling and subtracting the data mean, if provided. Note that the
1569 // mean subtraction is always carried out before scaling.
1570 optional float scale = 2 [default = 1];
1571 optional string mean_file = 3;
1572 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
1573 // crop an image.
1574 optional uint32 crop_size = 5 [default = 0];
1575 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
1576 // data.
1577 optional bool mirror = 6 [default = false];
1578 optional string root_folder = 12 [default = ""];
1579 // Resize while reading to adjust images by short side.
1580 // Has priority over new_height and new_width.
1581 optional uint32 short_side = 13 [default = 0];
1582 // Number of Data Transformer threads per GPU.
1583 optional uint32 threads = 14 [default = 4];
1584 // Whether or not ImageLayer should cache all files in RAM. Cache might fail if it doesn't fit.
1585 optional bool cache = 15 [default = false];
1586 }
1588 message InfogainLossParameter {
1589 // Specify the infogain matrix source.
1590 optional string source = 1;
1591 }
1593 message InnerProductParameter {
1594 optional uint32 num_output = 1; // The number of outputs for the layer
1595 optional bool bias_term = 2 [default = true]; // whether to have bias terms
1596 optional FillerParameter weight_filler = 3; // The filler for the weight
1597 optional FillerParameter bias_filler = 4; // The filler for the bias
1599 // The first axis to be lumped into a single inner product computation;
1600 // all preceding axes are retained in the output.
1601 // May be negative to index from the end (e.g., -1 for the last axis).
1602 optional int32 axis = 5 [default = 1];
1603 // Specify whether to transpose the weight matrix or not.
1604 // If transpose == true, any operations will be performed on the transpose
1605 // of the weight matrix. The weight matrix itself is not going to be transposed
1606 // but rather the transfer flag of operations will be toggled accordingly.
1607 optional bool transpose = 6 [default = false];
1608 }
1610 message InputParameter {
1611 // This layer produces N >= 1 top blob(s) to be assigned manually.
1612 // Define N shapes to set a shape for each top.
1613 // Define 1 shape to set the same shape for every top.
1614 // Define no shape to defer to reshaping manually.
1615 repeated BlobShape shape = 1;
1616 }
1618 // Message that stores parameters used by LogLayer
1619 message LogParameter {
1620 // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
1621 // Or if base is set to the default (-1), base is set to e,
1622 // so y = ln(shift + scale * x) = log_e(shift + scale * x)
1623 optional float base = 1 [default = -1.0];
1624 optional float scale = 2 [default = 1.0];
1625 optional float shift = 3 [default = 0.0];
1626 }
1628 // Message that stores parameters used by LRNLayer
1629 message LRNParameter {
1630 optional uint32 local_size = 1 [default = 5];
1631 optional float alpha = 2 [default = 1.];
1632 optional float beta = 3 [default = 0.75];
1633 enum NormRegion {
1634 ACROSS_CHANNELS = 0;
1635 WITHIN_CHANNEL = 1;
1636 }
1637 optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
1638 optional float k = 5 [default = 1.];
1639 enum Engine {
1640 DEFAULT = 0;
1641 CAFFE = 1;
1642 CUDNN = 2;
1643 }
1644 optional Engine engine = 6 [default = DEFAULT];
1645 }
1647 message MemoryDataParameter {
1648 optional uint32 batch_size = 1;
1649 optional uint32 channels = 2;
1650 optional uint32 height = 3;
1651 optional uint32 width = 4;
1652 }
1654 // Message that store parameters used by MultiBoxLossLayer
1655 message MultiBoxLossParameter {
1656 // Localization loss type.
1657 enum LocLossType {
1658 L2 = 0;
1659 SMOOTH_L1 = 1;
1660 }
1661 optional LocLossType loc_loss_type = 1 [default = SMOOTH_L1];
1662 // Confidence loss type.
1663 enum ConfLossType {
1664 SOFTMAX = 0;
1665 LOGISTIC = 1;
1666 }
1667 optional ConfLossType conf_loss_type = 2 [default = SOFTMAX];
1668 // Weight for localization loss.
1669 optional float loc_weight = 3 [default = 1.0];
1670 // Number of classes to be predicted. Required!
1671 optional uint32 num_classes = 4;
1672 // If true, bounding box are shared among different classes.
1673 optional bool share_location = 5 [default = true];
1674 // Matching method during training.
1675 enum MatchType {
1676 BIPARTITE = 0;
1677 PER_PREDICTION = 1;
1678 }
1679 optional MatchType match_type = 6 [default = PER_PREDICTION];
1680 // If match_type is PER_PREDICTION, use overlap_threshold to
1681 // determine the extra matching bboxes.
1682 optional float overlap_threshold = 7 [default = 0.5];
1683 // Use prior for matching.
1684 optional bool use_prior_for_matching = 8 [default = true];
1685 // Background label id.
1686 optional uint32 background_label_id = 9 [default = 0];
1687 // If true, also consider difficult ground truth.
1688 optional bool use_difficult_gt = 10 [default = true];
1689 // If true, perform negative mining.
1690 // DEPRECATED: use mining_type instead.
1691 optional bool do_neg_mining = 11;
1692 // The negative/positive ratio.
1693 optional float neg_pos_ratio = 12 [default = 3.0];
1694 // The negative overlap upperbound for the unmatched predictions.
1695 optional float neg_overlap = 13 [default = 0.5];
1696 // Type of coding method for bbox.
1697 optional PriorBoxParameter.CodeType code_type = 14 [default = CORNER];
1698 // If true, encode the variance of prior box in the loc loss target instead of
1699 // in bbox.
1700 optional bool encode_variance_in_target = 16 [default = false];
1701 // If true, map all object classes to agnostic class. It is useful for learning
1702 // objectness detector.
1703 optional bool map_object_to_agnostic = 17 [default = false];
1704 // If true, ignore cross boundary bbox during matching.
1705 // Cross boundary bbox is a bbox who is outside of the image region.
1706 optional bool ignore_cross_boundary_bbox = 18 [default = false];
1707 // If true, only backpropagate on corners which are inside of the image
1708 // region when encode_type is CORNER or CORNER_SIZE.
1709 optional bool bp_inside = 19 [default = false];
1710 // Mining type during training.
1711 // NONE : use all negatives.
1712 // MAX_NEGATIVE : select negatives based on the score.
1713 // HARD_EXAMPLE : select hard examples based on "Training Region-based Object Detectors with Online Hard Example Mining", Shrivastava et.al.
1714 enum MiningType {
1715 NONE = 0;
1716 MAX_NEGATIVE = 1;
1717 HARD_EXAMPLE = 2;
1718 }
1719 optional MiningType mining_type = 20 [default = MAX_NEGATIVE];
1720 // Parameters used for non maximum suppression durig hard example mining.
1721 optional NonMaximumSuppressionParameter nms_param = 21;
1722 optional int32 sample_size = 22 [default = 64];
1723 optional bool use_prior_for_nms = 23 [default = false];
1724 optional bool ignore_difficult_gt = 24 [default = false];
1725 }
1727 message MVNParameter {
1728 // This parameter can be set to false to normalize mean only
1729 optional bool normalize_variance = 1 [default = true];
1731 // This parameter can be set to true to perform DNN-like MVN
1732 optional bool across_channels = 2 [default = false];
1734 // Epsilon for not dividing by zero while normalizing variance
1735 optional float eps = 3 [default = 1e-9];
1736 }
1738 // Message that stores parameters used by NormalizeLayer
1739 message NormalizeParameter {
1740 optional bool across_spatial = 1 [default = true];
1741 // Initial value of scale. Default is 1.0 for all
1742 optional FillerParameter scale_filler = 2;
1743 // Whether or not scale parameters are shared across channels.
1744 optional bool channel_shared = 3 [default = true];
1745 // Epsilon for not dividing by zero while normalizing variance
1746 optional float eps = 4 [default = 1e-10];
1747 }
1749 message PermuteParameter {
1750 // The new orders of the axes of data. Notice it should be with
1751 // in the same range as the input data, and it starts from 0.
1752 // Do not provide repeated order.
1753 repeated uint32 order = 1;
1754 }
1756 message PoolingParameter {
1757 enum PoolMethod {
1758 MAX = 0;
1759 AVE = 1;
1760 STOCHASTIC = 2;
1761 }
1762 optional PoolMethod pool = 1 [default = MAX]; // The pooling method
1763 // Pad, kernel size, and stride are all given as a single value for equal
1764 // dimensions in height and width or as Y, X pairs.
1765 optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
1766 optional uint32 pad_h = 9 [default = 0]; // The padding height
1767 optional uint32 pad_w = 10 [default = 0]; // The padding width
1768 optional uint32 kernel_size = 2; // The kernel size (square)
1769 optional uint32 kernel_h = 5; // The kernel height
1770 optional uint32 kernel_w = 6; // The kernel width
1771 optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
1772 optional uint32 stride_h = 7; // The stride height
1773 optional uint32 stride_w = 8; // The stride width
1774 enum Engine {
1775 DEFAULT = 0;
1776 CAFFE = 1;
1777 CUDNN = 2;
1778 }
1779 optional Engine engine = 11 [default = DEFAULT];
1780 // If global_pooling then it will pool over the size of the bottom by doing
1781 // kernel_h = bottom->height and kernel_w = bottom->width
1782 optional bool global_pooling = 12 [default = false];
1784 // FB pooling parameters
1785 // Use floor((height + 2 * padding - kernel) / stride) + 1
1786 // instead of ceil((height + 2 * padding - kernel) / stride) + 1
1787 optional bool torch_pooling = 40 [default = false];
1788 }
1790 message PowerParameter {
1791 // PowerLayer computes outputs y = (shift + scale * x) ^ power.
1792 optional float power = 1 [default = 1.0];
1793 optional float scale = 2 [default = 1.0];
1794 optional float shift = 3 [default = 0.0];
1795 }
1797 // Message that store parameters used by PriorBoxLayer
1798 message PriorBoxParameter {
1799 // Encode/decode type.
1800 enum CodeType {
1801 CORNER = 1;
1802 CENTER_SIZE = 2;
1803 CORNER_SIZE = 3;
1804 }
1805 // Minimum box size (in pixels). Required!
1806 repeated float min_size = 1;
1807 // Maximum box size (in pixels). Required!
1808 repeated float max_size = 2;
1809 // Various of aspect ratios. Duplicate ratios will be ignored.
1810 // If none is provided, we use default ratio 1.
1811 repeated float aspect_ratio = 3;
1812 // If true, will flip each aspect ratio.
1813 // For example, if there is aspect ratio "r",
1814 // we will generate aspect ratio "1.0/r" as well.
1815 optional bool flip = 4 [default = true];
1816 // If true, will clip the prior so that it is within [0, 1]
1817 optional bool clip = 5 [default = false];
1818 // Variance for adjusting the prior bboxes.
1819 repeated float variance = 6;
1820 // By default, we calculate img_height, img_width, step_x, step_y based on
1821 // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
1822 // provided.
1823 // Explicitly provide the img_size.
1824 optional uint32 img_size = 7;
1825 // Either img_size or img_h/img_w should be specified; not both.
1826 optional uint32 img_h = 8;
1827 optional uint32 img_w = 9;
1829 // Explicitly provide the step size.
1830 optional float step = 10;
1831 // Either step or step_h/step_w should be specified; not both.
1832 optional float step_h = 11;
1833 optional float step_w = 12;
1835 // Offset to the top left corner of each cell.
1836 optional float offset = 13 [default = 0.5];
1837 optional uint32 num_keypoint = 14 [default = 0];
1838 }
1840 message PythonParameter {
1841 optional string module = 1;
1842 optional string layer = 2;
1843 // This value is set to the attribute `param_str` of the `PythonLayer` object
1844 // in Python before calling the `setup()` method. This could be a number,
1845 // string, dictionary in Python dict format, JSON, etc. You may parse this
1846 // string in `setup` method and use it in `forward` and `backward`.
1847 optional string param_str = 3 [default = ''];
1848 // Whether this PythonLayer is shared among worker solvers during data parallelism.
1849 // If true, each worker solver sequentially run forward from this layer.
1850 // This value should be set true if you are using it as a data layer.
1851 optional bool share_in_parallel = 4 [default = false];
1852 }
1854 // Message that stores parameters used by RecurrentLayer
1855 message RecurrentParameter {
1856 // The dimension of the output (and usually hidden state) representation --
1857 // must be explicitly set to non-zero.
1858 optional uint32 num_output = 1 [default = 0];
1860 optional FillerParameter weight_filler = 2; // The filler for the weight
1861 optional FillerParameter bias_filler = 3; // The filler for the bias
1863 // Whether to enable displaying debug_info in the unrolled recurrent net.
1864 optional bool debug_info = 4 [default = false];
1866 // Whether to add as additional inputs (bottoms) the initial hidden state
1867 // blobs, and add as additional outputs (tops) the final timestep hidden state
1868 // blobs. The number of additional bottom/top blobs required depends on the
1869 // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
1870 optional bool expose_hidden = 5 [default = false];
1871 }
1873 // Message that stores parameters used by ReductionLayer
1874 message ReductionParameter {
1875 enum ReductionOp {
1876 SUM = 1;
1877 ASUM = 2;
1878 SUMSQ = 3;
1879 MEAN = 4;
1880 }
1882 optional ReductionOp operation = 1 [default = SUM]; // reduction operation
1884 // The first axis to reduce to a scalar -- may be negative to index from the
1885 // end (e.g., -1 for the last axis).
1886 // (Currently, only reduction along ALL "tail" axes is supported; reduction
1887 // of axis M through N, where N < num_axes - 1, is unsupported.)
1888 // Suppose we have an n-axis bottom Blob with shape:
1889 // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
1890 // If axis == m, the output Blob will have shape
1891 // (d0, d1, d2, ..., d(m-1)),
1892 // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
1893 // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
1894 // If axis == 0 (the default), the output Blob always has the empty shape
1895 // (count 1), performing reduction across the entire input --
1896 // often useful for creating new loss functions.
1897 optional int32 axis = 2 [default = 0];
1899 optional float coeff = 3 [default = 1.0]; // coefficient for output
1900 }
1902 // Message that stores parameters used by ReLULayer
1903 message ReLUParameter {
1904 // Allow non-zero slope for negative inputs to speed up optimization
1905 // Described in:
1906 // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
1907 // improve neural network acoustic models. In ICML Workshop on Deep Learning
1908 // for Audio, Speech, and Language Processing.
1909 optional float negative_slope = 1 [default = 0];
1910 enum Engine {
1911 DEFAULT = 0;
1912 CAFFE = 1;
1913 CUDNN = 2;
1914 }
1915 optional Engine engine = 2 [default = DEFAULT];
1916 }
1918 message ReshapeParameter {
1919 // Specify the output dimensions. If some of the dimensions are set to 0,
1920 // the corresponding dimension from the bottom layer is used (unchanged).
1921 // Exactly one dimension may be set to -1, in which case its value is
1922 // inferred from the count of the bottom blob and the remaining dimensions.
1923 // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
1924 //
1925 // layer {
1926 // type: "Reshape" bottom: "input" top: "output"
1927 // reshape_param { ... }
1928 // }
1929 //
1930 // If "input" is 2D with shape 2 x 8, then the following reshape_param
1931 // specifications are all equivalent, producing a 3D blob "output" with shape
1932 // 2 x 2 x 4:
1933 //
1934 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
1935 // reshape_param { shape { dim: 0 dim: 2 dim: 4 } }
1936 // reshape_param { shape { dim: 0 dim: 2 dim: -1 } }
1937 // reshape_param { shape { dim: 0 dim:-1 dim: 4 } }
1938 //
1939 optional BlobShape shape = 1;
1941 // axis and num_axes control the portion of the bottom blob's shape that are
1942 // replaced by (included in) the reshape. By default (axis == 0 and
1943 // num_axes == -1), the entire bottom blob shape is included in the reshape,
1944 // and hence the shape field must specify the entire output shape.
1945 //
1946 // axis may be non-zero to retain some portion of the beginning of the input
1947 // shape (and may be negative to index from the end; e.g., -1 to begin the
1948 // reshape after the last axis, including nothing in the reshape,
1949 // -2 to include only the last axis, etc.).
1950 //
1951 // For example, suppose "input" is a 2D blob with shape 2 x 8.
1952 // Then the following ReshapeLayer specifications are all equivalent,
1953 // producing a blob "output" with shape 2 x 2 x 4:
1954 //
1955 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
1956 // reshape_param { shape { dim: 2 dim: 4 } axis: 1 }
1957 // reshape_param { shape { dim: 2 dim: 4 } axis: -3 }
1958 //
1959 // num_axes specifies the extent of the reshape.
1960 // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
1961 // input axes in the range [axis, axis+num_axes].
1962 // num_axes may also be -1, the default, to include all remaining axes
1963 // (starting from axis).
1964 //
1965 // For example, suppose "input" is a 2D blob with shape 2 x 8.
1966 // Then the following ReshapeLayer specifications are equivalent,
1967 // producing a blob "output" with shape 1 x 2 x 8.
1968 //
1969 // reshape_param { shape { dim: 1 dim: 2 dim: 8 } }
1970 // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 }
1971 // reshape_param { shape { dim: 1 } num_axes: 0 }
1972 //
1973 // On the other hand, these would produce output blob shape 2 x 1 x 8:
1974 //
1975 // reshape_param { shape { dim: 2 dim: 1 dim: 8 } }
1976 // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 }
1977 //
1978 optional int32 axis = 2 [default = 0];
1979 optional int32 num_axes = 3 [default = -1];
1980 }
1982 message ScaleParameter {
1983 // The first axis of bottom[0] (the first input Blob) along which to apply
1984 // bottom[1] (the second input Blob). May be negative to index from the end
1985 // (e.g., -1 for the last axis).
1986 //
1987 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
1988 // top[0] will have the same shape, and bottom[1] may have any of the
1989 // following shapes (for the given value of axis):
1990 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
1991 // (axis == 1 == -3) 3; 3x40; 3x40x60
1992 // (axis == 2 == -2) 40; 40x60
1993 // (axis == 3 == -1) 60
1994 // Furthermore, bottom[1] may have the empty shape (regardless of the value of
1995 // "axis") -- a scalar multiplier.
1996 optional int32 axis = 1 [default = 1];
1998 // (num_axes is ignored unless just one bottom is given and the scale is
1999 // a learned parameter of the layer. Otherwise, num_axes is determined by the
2000 // number of axes by the second bottom.)
2001 // The number of axes of the input (bottom[0]) covered by the scale
2002 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
2003 // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
2004 optional int32 num_axes = 2 [default = 1];
2006 // (filler is ignored unless just one bottom is given and the scale is
2007 // a learned parameter of the layer.)
2008 // The initialization for the learned scale parameter.
2009 // Default is the unit (1) initialization, resulting in the ScaleLayer
2010 // initially performing the identity operation.
2011 optional FillerParameter filler = 3;
2013 // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
2014 // may be more efficient). Initialized with bias_filler (defaults to 0).
2015 optional bool bias_term = 4 [default = false];
2016 optional FillerParameter bias_filler = 5;
2017 }
2019 message SigmoidParameter {
2020 enum Engine {
2021 DEFAULT = 0;
2022 CAFFE = 1;
2023 CUDNN = 2;
2024 }
2025 optional Engine engine = 1 [default = DEFAULT];
2026 }
2028 message SliceParameter {
2029 // The axis along which to slice -- may be negative to index from the end
2030 // (e.g., -1 for the last axis).
2031 // By default, SliceLayer concatenates blobs along the "channels" axis (1).
2032 optional int32 axis = 3 [default = 1];
2033 repeated uint32 slice_point = 2;
2035 // DEPRECATED: alias for "axis" -- does not support negative indexing.
2036 optional uint32 slice_dim = 1 [default = 1];
2037 }
2039 // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
2040 message SoftmaxParameter {
2041 enum Engine {
2042 DEFAULT = 0;
2043 CAFFE = 1;
2044 CUDNN = 2;
2045 }
2046 optional Engine engine = 1 [default = DEFAULT];
2048 // The axis along which to perform the softmax -- may be negative to index
2049 // from the end (e.g., -1 for the last axis).
2050 // Any other axes will be evaluated as independent softmaxes.
2051 optional int32 axis = 2 [default = 1];
2052 }
2054 message TanHParameter {
2055 enum Engine {
2056 DEFAULT = 0;
2057 CAFFE = 1;
2058 CUDNN = 2;
2059 }
2060 optional Engine engine = 1 [default = DEFAULT];
2061 }
2063 // Message that stores parameters used by TileLayer
2064 message TileParameter {
2065 // The index of the axis to tile.
2066 optional int32 axis = 1 [default = 1];
2068 // The number of copies (tiles) of the blob to output.
2069 optional int32 tiles = 2;
2070 }
2072 // Message that stores parameters used by ThresholdLayer
2073 message ThresholdParameter {
2074 optional float threshold = 1 [default = 0]; // Strictly positive values
2075 }
2077 message VideoDataParameter{
2078 enum VideoType {
2079 WEBCAM = 0;
2080 VIDEO = 1;
2081 }
2082 optional VideoType video_type = 1 [default = WEBCAM];
2083 optional int32 device_id = 2 [default = 0];
2084 optional string video_file = 3;
2085 // Number of frames to be skipped before processing a frame.
2086 optional uint32 skip_frames = 4 [default = 0];
2087 }
2089 message WindowDataParameter {
2090 // Specify the data source.
2091 optional string source = 1;
2092 // For data pre-processing, we can do simple scaling and subtracting the
2093 // data mean, if provided. Note that the mean subtraction is always carried
2094 // out before scaling.
2095 optional float scale = 2 [default = 1];
2096 optional string mean_file = 3;
2097 // Specify the batch size.
2098 optional uint32 batch_size = 4;
2099 // Specify if we would like to randomly crop an image.
2100 optional uint32 crop_size = 5 [default = 0];
2101 // Specify if we want to randomly mirror data.
2102 optional bool mirror = 6 [default = false];
2103 // Foreground (object) overlap threshold
2104 optional float fg_threshold = 7 [default = 0.5];
2105 // Background (non-object) overlap threshold
2106 optional float bg_threshold = 8 [default = 0.5];
2107 // Fraction of batch that should be foreground objects
2108 optional float fg_fraction = 9 [default = 0.25];
2109 // Amount of contextual padding to add around a window
2110 // (used only by the window_data_layer)
2111 optional uint32 context_pad = 10 [default = 0];
2112 // Mode for cropping out a detection window
2113 // warp: cropped window is warped to a fixed size and aspect ratio
2114 // square: the tightest square around the window is cropped
2115 optional string crop_mode = 11 [default = "warp"];
2116 // cache_images: will load all images in memory for faster access
2117 optional bool cache_images = 12 [default = false];
2118 // append root_folder to locate images
2119 optional string root_folder = 13 [default = ""];
2120 }
2122 message SPPParameter {
2123 enum PoolMethod {
2124 MAX = 0;
2125 AVE = 1;
2126 STOCHASTIC = 2;
2127 }
2128 optional uint32 pyramid_height = 1;
2129 optional PoolMethod pool = 2 [default = MAX]; // The pooling method
2130 enum Engine {
2131 DEFAULT = 0;
2132 CAFFE = 1;
2133 CUDNN = 2;
2134 }
2135 optional Engine engine = 6 [default = DEFAULT];
2136 }
2138 // DEPRECATED: use LayerParameter.
2139 message V1LayerParameter {
2140 repeated string bottom = 2;
2141 repeated string top = 3;
2142 optional string name = 4;
2143 repeated NetStateRule include = 32;
2144 repeated NetStateRule exclude = 33;
2145 enum LayerType {
2146 NONE = 0;
2147 ABSVAL = 35;
2148 ACCURACY = 1;
2149 ARGMAX = 30;
2150 BNLL = 2;
2151 CONCAT = 3;
2152 CONTRASTIVE_LOSS = 37;
2153 CONVOLUTION = 4;
2154 DATA = 5;
2155 DECONVOLUTION = 39;
2156 DROPOUT = 6;
2157 DUMMY_DATA = 32;
2158 EUCLIDEAN_LOSS = 7;
2159 ELTWISE = 25;
2160 EXP = 38;
2161 FLATTEN = 8;
2162 HDF5_DATA = 9;
2163 HDF5_OUTPUT = 10;
2164 HINGE_LOSS = 28;
2165 IM2COL = 11;
2166 IMAGE_DATA = 12;
2167 INFOGAIN_LOSS = 13;
2168 INNER_PRODUCT = 14;
2169 LRN = 15;
2170 MEMORY_DATA = 29;
2171 MULTINOMIAL_LOGISTIC_LOSS = 16;
2172 MVN = 34;
2173 POOLING = 17;
2174 POWER = 26;
2175 RELU = 18;
2176 SIGMOID = 19;
2177 SIGMOID_CROSS_ENTROPY_LOSS = 27;
2178 SILENCE = 36;
2179 SOFTMAX = 20;
2180 SOFTMAX_LOSS = 21;
2181 SPLIT = 22;
2182 SLICE = 33;
2183 TANH = 23;
2184 WINDOW_DATA = 24;
2185 THRESHOLD = 31;
2186 }
2187 optional LayerType type = 5;
2188 repeated BlobProto blobs = 6;
2189 repeated string param = 1001;
2190 repeated DimCheckMode blob_share_mode = 1002;
2191 enum DimCheckMode {
2192 STRICT = 0;
2193 PERMISSIVE = 1;
2194 }
2195 repeated float blobs_lr = 7;
2196 repeated float weight_decay = 8;
2197 repeated float loss_weight = 35;
2198 optional AccuracyParameter accuracy_param = 27;
2199 optional ArgMaxParameter argmax_param = 23;
2200 optional ConcatParameter concat_param = 9;
2201 optional ContrastiveLossParameter contrastive_loss_param = 40;
2202 optional ConvolutionParameter convolution_param = 10;
2203 optional DataParameter data_param = 11;
2204 optional DropoutParameter dropout_param = 12;
2205 optional DummyDataParameter dummy_data_param = 26;
2206 optional EltwiseParameter eltwise_param = 24;
2207 optional ExpParameter exp_param = 41;
2208 optional HDF5DataParameter hdf5_data_param = 13;
2209 optional HDF5OutputParameter hdf5_output_param = 14;
2210 optional HingeLossParameter hinge_loss_param = 29;
2211 optional ImageDataParameter image_data_param = 15;
2212 optional InfogainLossParameter infogain_loss_param = 16;
2213 optional InnerProductParameter inner_product_param = 17;
2214 optional LRNParameter lrn_param = 18;
2215 optional MemoryDataParameter memory_data_param = 22;
2216 optional MVNParameter mvn_param = 34;
2217 optional PoolingParameter pooling_param = 19;
2218 optional PowerParameter power_param = 21;
2219 optional ReLUParameter relu_param = 30;
2220 optional SigmoidParameter sigmoid_param = 38;
2221 optional SoftmaxParameter softmax_param = 39;
2222 optional SliceParameter slice_param = 31;
2223 optional TanHParameter tanh_param = 37;
2224 optional ThresholdParameter threshold_param = 25;
2225 optional WindowDataParameter window_data_param = 20;
2226 optional TransformationParameter transform_param = 36;
2227 optional LossParameter loss_param = 42;
2228 optional V0LayerParameter layer = 1;
2229 }
2231 // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
2232 // in Caffe. We keep this message type around for legacy support.
2233 message V0LayerParameter {
2234 optional string name = 1; // the layer name
2235 optional string type = 2; // the string to specify the layer type
2237 // Parameters to specify layers with inner products.
2238 optional uint32 num_output = 3; // The number of outputs for the layer
2239 optional bool biasterm = 4 [default = true]; // whether to have bias terms
2240 optional FillerParameter weight_filler = 5; // The filler for the weight
2241 optional FillerParameter bias_filler = 6; // The filler for the bias
2243 optional uint32 pad = 7 [default = 0]; // The padding size
2244 optional uint32 kernelsize = 8; // The kernel size
2245 optional uint32 group = 9 [default = 1]; // The group size for group conv
2246 optional uint32 stride = 10 [default = 1]; // The stride
2247 enum PoolMethod {
2248 MAX = 0;
2249 AVE = 1;
2250 STOCHASTIC = 2;
2251 }
2252 optional PoolMethod pool = 11 [default = MAX]; // The pooling method
2253 optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
2255 optional uint32 local_size = 13 [default = 5]; // for local response norm
2256 optional float alpha = 14 [default = 1.]; // for local response norm
2257 optional float beta = 15 [default = 0.75]; // for local response norm
2258 optional float k = 22 [default = 1.];
2260 // For data layers, specify the data source
2261 optional string source = 16;
2262 // For data pre-processing, we can do simple scaling and subtracting the
2263 // data mean, if provided. Note that the mean subtraction is always carried
2264 // out before scaling.
2265 optional float scale = 17 [default = 1];
2266 optional string meanfile = 18;
2267 // For data layers, specify the batch size.
2268 optional uint32 batchsize = 19;
2269 // For data layers, specify if we would like to randomly crop an image.
2270 optional uint32 cropsize = 20 [default = 0];
2271 // For data layers, specify if we want to randomly mirror data.
2272 optional bool mirror = 21 [default = false];
2274 // The blobs containing the numeric parameters of the layer
2275 repeated BlobProto blobs = 50;
2276 // The ratio that is multiplied on the global learning rate. If you want to
2277 // set the learning ratio for one blob, you need to set it for all blobs.
2278 repeated float blobs_lr = 51;
2279 // The weight decay that is multiplied on the global weight decay.
2280 repeated float weight_decay = 52;
2282 // The rand_skip variable is for the data layer to skip a few data points
2283 // to avoid all asynchronous sgd clients to start at the same point. The skip
2284 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
2285 // be larger than the number of keys in the database.
2286 optional uint32 rand_skip = 53 [default = 0];
2288 // Fields related to detection (det_*)
2289 // foreground (object) overlap threshold
2290 optional float det_fg_threshold = 54 [default = 0.5];
2291 // background (non-object) overlap threshold
2292 optional float det_bg_threshold = 55 [default = 0.5];
2293 // Fraction of batch that should be foreground objects
2294 optional float det_fg_fraction = 56 [default = 0.25];
2296 // optional bool OBSOLETE_can_clobber = 57 [default = true];
2298 // Amount of contextual padding to add around a window
2299 // (used only by the window_data_layer)
2300 optional uint32 det_context_pad = 58 [default = 0];
2302 // Mode for cropping out a detection window
2303 // warp: cropped window is warped to a fixed size and aspect ratio
2304 // square: the tightest square around the window is cropped
2305 optional string det_crop_mode = 59 [default = "warp"];
2307 // For ReshapeLayer, one needs to specify the new dimensions.
2308 optional int32 new_num = 60 [default = 0];
2309 optional int32 new_channels = 61 [default = 0];
2310 optional int32 new_height = 62 [default = 0];
2311 optional int32 new_width = 63 [default = 0];
2313 // Whether or not ImageLayer should shuffle the list of files at every epoch.
2314 // It will also resize images if new_height or new_width are not zero.
2315 optional bool shuffle_images = 64 [default = false];
2317 // For ConcatLayer, one needs to specify the dimension for concatenation, and
2318 // the other dimensions must be the same for all the bottom blobs.
2319 // By default it will concatenate blobs along the channels dimension.
2320 optional uint32 concat_dim = 65 [default = 1];
2322 optional HDF5OutputParameter hdf5_output_param = 1001;
2323 }
2325 message PReLUParameter {
2326 // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
2327 // Surpassing Human-Level Performance on ImageNet Classification, 2015.
2329 // Initial value of a_i. Default is a_i=0.25 for all i.
2330 optional FillerParameter filler = 1;
2331 // Whether or not slope parameters are shared across channels.
2332 optional bool channel_shared = 2 [default = false];
2333 }
2335 // Specifies how to deal with connections whose weights are recovered/initialized as zeros.
2336 // Used for fine-tuning
2337 enum SparseMode {
2338 SPARSE_NONE = 0; //keep connected
2339 SPARSE_UPDATE = 1; //disconnect all zero-weight connections during back propagation udpate
2340 }
2342 // Message for layers with reduced word with arithmetic
2343 enum QuantizationParameter_Precision {
2344 QuantizationParameter_Precision_FLOAT = 0;
2345 QuantizationParameter_Precision_DYNAMIC_FIXED_POINT = 1;
2346 }
2347 enum QuantizationParameter_Rounding {
2348 QuantizationParameter_Rounding_NEAREST = 0;
2349 QuantizationParameter_Rounding_STOCHASTIC = 1;
2350 }
2352 // Quantization params for a layer
2353 message QuantizationParameter {
2354 optional QuantizationParameter_Precision precision = 1 [default = QuantizationParameter_Precision_DYNAMIC_FIXED_POINT];
2355 optional QuantizationParameter_Rounding rounding_scheme = 2 [default = QuantizationParameter_Rounding_NEAREST];
2356 optional bool power2_scale_weights = 3 [default = false];
2357 optional bool power2_scale_activations = 4 [default = false];
2359 // Dynamic fixed point params
2360 message QParams {
2361 optional bool quantize = 1 [default = false];
2362 optional uint32 bitwidth = 2 [default = 8];
2363 optional int32 fracbits = 3 [default = 0];
2364 optional bool unsigned_data = 4 [ default = false];
2365 optional bool unsigned_quant = 5 [ default = false];
2366 optional float scale_target = 6 [default = 1.0];
2367 optional float scale_applied = 7 [default = 1.0];
2368 optional int32 shiftbits = 8 [default = 0];
2369 optional float offset = 9 [default = 0];
2370 optional float min = 10 [default = 0];
2371 optional float max = 11 [default = 0];
2372 }
2373 repeated QParams qparam_in = 5;
2374 repeated QParams qparam_w = 6;
2375 repeated QParams qparam_out = 7;
2377 optional int32 quantized_infer_count = 8 [ default = 0];
2378 }
2380 // Quantization params for the net
2381 message NetQuantizationParameter {
2382 optional bool quantize_weights = 1 [default = true];
2383 optional bool quantize_activations = 2 [default = true];
2385 //8-bits doesn't seem to be working for classification if previous frame's range is used.
2386 optional uint32 bitwidth_weights = 3 [default = 12];
2387 optional uint32 bitwidth_activations = 4 [default = 8];
2388 optional uint32 bitwidth_bias = 5 [default = 16];
2390 //indicates whether quantized range is power of 2 or not.
2391 //if input is quantized with power2 range, the output quantized value can be obtained by shifting with fracbits
2392 optional bool power2_scale_weights = 6 [default = false];
2393 optional bool power2_scale_activations = 7 [default = false];
2395 //apply offset to make quantized range unsigned and optimal.
2396 optional bool apply_offset_weights = 8 [default = true];
2397 optional bool apply_offset_activations = 9 [default = false];
2399 optional QuantizationParameter_Precision precision = 10 [default = QuantizationParameter_Precision_DYNAMIC_FIXED_POINT];
2400 optional QuantizationParameter_Rounding rounding_scheme = 11 [default = QuantizationParameter_Rounding_NEAREST];
2402 optional int32 display_quantization = 12 [default = 2000];
2403 optional bool update_quantization_param = 13 [default = true];
2405 //frame/iter at which quantization is introduced
2406 optional int32 quantization_start = 14 [default = 1];
2407 optional float range_update_factor = 15 [default = 0.01];
2408 optional float range_expansion_factor = 16 [default = 1.25];
2410 repeated string ignored_layer_names = 17;
2411 }
2413 message ImageSliceInfo {
2414 repeated uint32 dim = 1;
2415 repeated uint32 stride = 2;
2416 repeated uint32 offset = 3;
2417 }
2420 message ImageLabelDataParameter {
2421 enum DB {
2422 LEVELDB = 0;
2423 LMDB = 1;
2424 }
2425 // Specify the data source.
2426 optional string image_list_path = 1;
2427 optional string label_list_path = 2;
2428 // Specify the batch size.
2429 optional uint32 batch_size = 3 [default = 1];
2430 // The rand_skip variable is for the data layer to skip a few data points
2431 // to avoid all asynchronous sgd clients to start at the same point. The skip
2432 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
2433 // be larger than the number of keys in the database.
2434 optional uint32 rand_skip = 4 [default = 0];
2435 // Whether or not ImageLayer should shuffle the list of files at every epoch.
2436 optional bool shuffle = 5 [default = false];
2438 // Whether random sample image scales
2439 optional float scale_prob = 6 [default = 0];
2440 optional float scale_min = 7 [default = 1];
2441 optional float scale_max = 8 [default = 1];
2443 optional string data_dir = 9 [default = ""];
2445 optional ImageSliceInfo label_slice = 10;
2447 optional string image_dir = 11 [default = ""];
2448 optional string label_dir = 12 [default = ""];
2450 enum Padding {
2451 ZERO = 0;
2452 REFLECT = 1;
2453 }
2455 optional Padding padding = 13 [default = REFLECT];
2457 optional string image_dir2 = 14 [default = ""];
2458 optional string image_list_path2 = 15;
2460 optional uint32 threads = 16 [default = 0];
2461 optional uint32 size_min = 17 [default = 0];
2462 optional uint32 size_max = 18 [default = 0];
2464 optional bool check_image_files = 19 [default = false];
2466 optional DB backend = 20 [default = LEVELDB];
2467 }