summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: f6e0c49)
raw | patch | inline | side by side (parent: f6e0c49)
author | Yuan Zhao <yuanzhao@ti.com> | |
Mon, 25 Nov 2019 01:58:14 +0000 (19:58 -0600) | ||
committer | Yuan Zhao <yuanzhao@ti.com> | |
Mon, 25 Nov 2019 04:20:25 +0000 (22:20 -0600) |
- If Layer2Group map exists in subgraph config file, use it.
Otherwise, try derive the map from network layer types.
- Added TidlFreeSubgraph() for subgraph resource de-allocation
- Code changes based on review comments.
- MCT-1223
Otherwise, try derive the map from network layer types.
- Added TidlFreeSubgraph() for subgraph resource de-allocation
- Code changes based on review comments.
- MCT-1223
index 8a77f6576eed068ecfc3b20d506c476b26ac3aa7..2233d9669c254244e78963bd73cad8dc845f9e38 100644 (file)
#define NUM_DEFAULT_INPUTS 1
#define DEFAULT_OBJECT_CLASSES_LIST_FILE "imagenet_objects.json"
#define DEFAULT_OUTPUT_PROB_THRESHOLD 5
+#define MOBILENET_IN_C (3)
+#define MOBILENET_IN_H (224)
+#define MOBILENET_IN_W (224)
+#define MOBILENET_INPUT_SIZE (1*MOBILENET_IN_C*MOBILENET_IN_H*MOBILENET_IN_W)
+#define MOBILENET_OUTPUT_SIZE (1001)
const char *default_inputs[NUM_DEFAULT_INPUTS] =
{
"../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg"
{
TidlInitSubgraph(1, 0);
float **inputs = new float *[1];
- inputs[0] = new float[1*3*224*224];
+ inputs[0] = new float[MOBILENET_INPUT_SIZE];
float **outputs = new float *[1];
- outputs[0] = new float[1001];
+ outputs[0] = new float[MOBILENET_OUTPUT_SIZE];
for (int i = 0; i < 5; i ++)
{
float **outputs = new float *[batch_size];
for (int i = 0; i < batch_size; i++)
{
- inputs[i] = new float[1*3*224*224];
- outputs[i] = new float[1001];
+ inputs[i] = new float[MOBILENET_INPUT_SIZE];
+ outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
}
chrono::time_point<chrono::steady_clock> tloop0, tloop1;
float **outputs = new float *[num_threads];
for (int i = 0; i < num_threads; i++)
{
- inputs[i] = new float[1*3*224*224];
- outputs[i] = new float[1001];
+ inputs[i] = new float[MOBILENET_INPUT_SIZE];
+ outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
}
vector<future<bool>> futures(num_threads);
bool skip_outputs = false;
vector<UserData> v_data(num_threads);
for (int i = 0; i < num_threads; i++)
{
- inputs[i] = new float[1*3*224*224];
- outputs[i] = new float[1001];
+ inputs[i] = new float[MOBILENET_INPUT_SIZE];
+ outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
v_data[i].inputs = &inputs[i];
v_data[i].outputs = &outputs[i];
}
float **outputs = new float *[num_threads * batch_size];
for (int i = 0; i < num_threads * batch_size; i++)
{
- inputs[i] = new float[1*3*224*224];
- outputs[i] = new float[1001];
+ inputs[i] = new float[MOBILENET_INPUT_SIZE];
+ outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
}
vector<future<bool>> futures(num_threads);
bool skip_outputs = false;
int batch_size)
{
Configuration c;
- c.inNumChannels = 3;;
- c.inWidth = 224;
- c.inHeight = 224;
+ c.inNumChannels = MOBILENET_IN_C;
+ c.inWidth = MOBILENET_IN_W;
+ c.inHeight = MOBILENET_IN_H;
c.preProcType = 2;
- SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false}, {1,3,224,224}};
+ SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false},
+ {1,MOBILENET_IN_C,MOBILENET_IN_H,MOBILENET_IN_W}};
- char* frame_buffer = new char[3*224*224];
+ char* frame_buffer = new char[MOBILENET_INPUT_SIZE];
assert (frame_buffer != nullptr);
Mat image;
index 65db5b5dc2e1c14dc77a039080b172d0bb66a1d8..c75d6b2b9461e74bafac13c14af7f0886fe29570 100644 (file)
int subgraph_id
);
+//! @brief Top level API to free a TIDL subgraph on device
+//! @param total_subgraphs total number of TIDL subgraphs in whole inference
+//! @param subgraph_id index of current TIDL subgraph
+extern void TidlFreeSubgraph(int total_subgraphs,
+ int subgraph_id
+ );
+
//! @brief Top level inference to run a TIDL subgraph
//! @param total_subgraphs total number of TIDL subgraphs in whole inference
//! @param subgraph_id index of current TIDL subgraph
index 24b378e241b4d6b7ac5e881aa35969bbaf6c22d5..9d068b2db7c9ab1cc6a0b41aba746cf89cd0a6de 100644 (file)
res.InitSubgraph(subgraph_id);
}
+void TidlFreeSubgraph(int total_subgraphs, int subgraph_id)
+{
+ ResM& res = ResM::Instance(total_subgraphs);
+ res.FreeSubgraph(subgraph_id);
+}
void TidlRunSubgraph(int total_subgraphs,
int subgraph_id,
ResM::~ResM()
{
+ for (uint32_t i = 0; i < num_subgraphs_m; i++)
+ FreeSubgraph(i);
+
+ delete eops_m;
+ eops_m = nullptr;
+}
+
+void ResM::FreeSubgraph(uint32_t subgraph_id)
+{
+ assert(subgraph_id < num_subgraphs_m);
+
if (eops_m != nullptr)
{
- for (const ResEOP& res_eop : *eops_m)
+ ResEOP& res_eop = (*eops_m)[subgraph_id];
+ if (res_eop.eops != nullptr)
{
- if (res_eop.eops != nullptr)
+ for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
{
- for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
- {
- free(eop->GetInputBufferPtr());
- free(eop->GetOutputBufferPtr());
- delete eop;
- }
+ free(eop->GetInputBufferPtr());
+ free(eop->GetOutputBufferPtr());
+ delete eop;
}
+ delete res_eop.eops;
+ res_eop.eops = nullptr;
}
- delete eops_m;
- eops_m = nullptr;
}
- for (const Executor* e : es_m)
- if (e != nullptr) delete e;
- for (const Executor* e : e2s_m)
- if (e != nullptr) delete e;
- for (SubgraphDataConv *dc : in_conv_m)
- if (dc != nullptr) delete dc;
- for (SubgraphDataConv *dc : out_conv_m)
- if (dc != nullptr) delete dc;
+ delete es_m[subgraph_id];
+ es_m[subgraph_id] = nullptr;
+
+ delete e2s_m[subgraph_id];
+ e2s_m[subgraph_id] = nullptr;
+
+ delete in_conv_m[subgraph_id];
+ in_conv_m[subgraph_id] = nullptr;
+
+ delete out_conv_m[subgraph_id];
+ out_conv_m[subgraph_id] = nullptr;
}
ResM& ResM::Instance(uint32_t total_num_subgraphs)
// uint32_t num_dsps_used = 0;
if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet)
{
- int32_t start_layer = net->numLayers -1;
- int32_t end_layer = 0;
- if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
- start_layer -= 1;
- if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
- end_layer += 1;
- int32_t i = start_layer;
- for ( ; i > end_layer; i--)
+ if (cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
{
- int32_t layer_type = net->TIDLLayers[i].layerType;
- if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
- layer_type != (int32_t) TIDL_InnerProductLayer &&
- layer_type != (int32_t) TIDL_PoolingLayer)
- break;
- }
- i += 1;
- if (i <= start_layer)
- {
- if (num_lg2_dsps_used_m < num_dsps_m)
+ int32_t start_layer = net->numLayers -1;
+ int32_t end_layer = 0;
+ if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
+ start_layer -= 1;
+ if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
+ end_layer += 1;
+ int32_t i = start_layer;
+ for ( ; i > end_layer; i--)
+ {
+ int32_t layer_type = net->TIDLLayers[i].layerType;
+ if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
+ layer_type != (int32_t) TIDL_InnerProductLayer &&
+ layer_type != (int32_t) TIDL_PoolingLayer)
+ break;
+ }
+ i += 1;
+ if (i <= start_layer)
{
- if (enable_trace_m)
- printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
- subgraph_id, i, start_layer);
- while (i <= start_layer)
- cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
- e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
- num_lg2_dsps_used_m += 1;
- if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph
+ if (num_lg2_dsps_used_m < num_dsps_m)
{
- while (num_lg2_dsps_used_m < num_dsps_m)
- e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++));
+ if (enable_trace_m)
+ printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
+ subgraph_id, i, start_layer);
+ while (i <= start_layer)
+ cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
}
}
}
- delete net;
+ else
+ {
+ if (enable_trace_m)
+ printf("Subgraph %d: using layer2group map in config file for DSP\n",
+ subgraph_id);
+ }
+
+ if (! cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
+ {
+ e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
+ num_lg2_dsps_used_m += 1;
+ if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph
+ {
+ while (num_lg2_dsps_used_m < num_dsps_m)
+ e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++));
+ }
+ }
}
+ delete net;
if (e2_ids.empty())
cs_m[subgraph_id].runFullNet = true;
index 9738dbbac7cbf65671d263880043050582c4926b..54dc12dd2837cfbdbf20ae80d4a70aa23574cd7b 100644 (file)
// how to get resources for subgraph_id
void InitSubgraph(uint32_t subgraph_id);
+ void FreeSubgraph(uint32_t subgraph_id);
uint32_t GetNumEOPs(uint32_t subgraph_id);
ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
void FreeEOP(uint32_t subgraph_id,