TRT::Tensor roi_align_inputs(TRT::DataType::Float); roi_align_inputs.resize(infer_batch_size * MAX_IMAGE_BBOX * 6); roi_align_inputs.to_cpu(false); output_array_device.to_cpu(true);
to_cpu(true),转cpu,同时拷贝一份。
测试执行时间:
const int ntest =1; auto begin_timer = iLogger::timestamp_now_float(); for (int i = 0; i < ntest; ++i) boxes_array = engine->commits(images); // wait all result boxes_array.back().get(); float inference_average_time = (iLogger::timestamp_now_float() - begin_timer) / ntest / images.size(); auto type_name = FasterRCNN::type_name(type); auto mode_name = TRT::mode_string(mode); INFO("%s[%s] average: %.2f ms / image, FPS: %.2f", engine_file.c_str(), type_name, inference_average_time, 1000 / inference_average_time);
后面持续更新。