postprocessing: hailo: Remove hardcoded input tensor sizes

Remove all hard-coded tensor size consts from the Hailo postprocessing stages. Add a HailoPostProcessingStage::InputTensorSize() helper to fetch the input tensor size supplied in the JSON file instead. Signed-off-by: Naushir Patuck <[email protected]>
raspberrypi · Jul 18, 2024 · 560f384 · 560f384
1 parent 11390c3
commit 560f384
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 43 deletions.
diff --git a/post_processing_stages/hailo/hailo_classifier.cpp b/post_processing_stages/hailo/hailo_classifier.cpp
@@ -26,8 +26,6 @@ using PostProcFuncPtr = void (*)(HailoROIPtr);
 #define NAME "hailo_classifier"
 #define POSTPROC_LIB "libclassification.so"
 
-static constexpr Size INPUT_TENSOR_SIZE { 224, 224 };
-
 class HailoClassifier : public HailoPostProcessingStage
 {
 public:
@@ -90,8 +88,8 @@ bool HailoClassifier::Process(CompletedRequestPtr &completed_request)
 	if (low_res_info_.pixel_format == libcamera::formats::YUV420)
 	{
 		StreamInfo rgb_info;
-		rgb_info.width = INPUT_TENSOR_SIZE.width;
-		rgb_info.height = INPUT_TENSOR_SIZE.height;
+		rgb_info.width = InputTensorSize().width;
+		rgb_info.height = InputTensorSize().height;
 		rgb_info.stride = rgb_info.width * 3;
 
 		input = allocator_.Allocate(rgb_info.stride * rgb_info.height);

diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.cpp b/post_processing_stages/hailo/hailo_postprocessing_stage.cpp
@@ -145,6 +145,9 @@ int HailoPostProcessingStage::configureHailoRT()
 	}
 	bindings_ = std::move(bindings_exp.release());
 
+	hailo_3d_image_shape_t shape = infer_model_->inputs()[0].shape();
+	input_tensor_size_ = libcamera::Size(shape.width, shape.height);
+
 	return 0;
 }
 

diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.hpp b/post_processing_stages/hailo/hailo_postprocessing_stage.hpp
@@ -102,6 +102,11 @@ class HailoPostProcessingStage : public PostProcessingStage
 		return std::string(HAILO_POSTPROC_LIB_DIR) + "/" + lib;
 	}
 
+	const libcamera::Size &InputTensorSize() const
+	{
+		return input_tensor_size_;
+	}
+
 	hailo_status DispatchJob(const uint8_t *input, hailort::AsyncInferJob &job, std::vector<OutTensor> &output_tensors);
 	HailoROIPtr MakeROI(const std::vector<OutTensor> &output_tensors) const;
 
@@ -125,4 +130,5 @@ class HailoPostProcessingStage : public PostProcessingStage
 	std::string hef_file_;
 	hailort::ConfiguredInferModel::Bindings bindings_;
 	std::chrono::time_point<std::chrono::steady_clock> last_frame_;
+	libcamera::Size input_tensor_size_;
 };
diff --git a/post_processing_stages/hailo/hailo_yolo_inference.cpp b/post_processing_stages/hailo/hailo_yolo_inference.cpp
@@ -34,13 +34,6 @@ namespace fs = std::filesystem;
 #define POSTPROC_LIB "libyolo_post.so"
 #define POSTPROC_LIB_NMS "libyolo_hailortpp_post.so"
 
-namespace
-{
-
-constexpr Size INPUT_TENSOR_SIZE { 640, 640 };
-
-} // namespace
-
 class YoloInference : public HailoPostProcessingStage
 {
 public:
@@ -147,9 +140,9 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
 		return false;
 	}
 
-	if (low_res_info_.width != INPUT_TENSOR_SIZE.width || low_res_info_.height != INPUT_TENSOR_SIZE.height)
+	if (low_res_info_.width != InputTensorSize().width || low_res_info_.height != InputTensorSize().height)
 	{
-		LOG_ERROR("Wrong low res size, expecting " << INPUT_TENSOR_SIZE.toString());
+		LOG_ERROR("Wrong low res size, expecting " << InputTensorSize().toString());
 		return false;
 	}
 
@@ -161,8 +154,8 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
 	if (low_res_info_.pixel_format == libcamera::formats::YUV420)
 	{
 		StreamInfo rgb_info;
-		rgb_info.width = INPUT_TENSOR_SIZE.width;
-		rgb_info.height = INPUT_TENSOR_SIZE.height;
+		rgb_info.width = InputTensorSize().width;
+		rgb_info.height = InputTensorSize().height;
 		rgb_info.stride = rgb_info.width * 3;
 
 		input = allocator_.Allocate(rgb_info.stride * rgb_info.height);

diff --git a/post_processing_stages/hailo/hailo_yolov5_segmentation.cpp b/post_processing_stages/hailo/hailo_yolov5_segmentation.cpp
@@ -38,8 +38,6 @@ namespace fs = std::filesystem;
 namespace
 {
 
-constexpr Size INPUT_TENSOR_SIZE { 640, 640 };
-
 template <typename T>
 class MessageQueue
 {
@@ -305,9 +303,9 @@ bool YoloSegmentation::Process(CompletedRequestPtr &completed_request)
 		return false;
 	}
 
-	if (low_res_info_.width != INPUT_TENSOR_SIZE.width || low_res_info_.height != INPUT_TENSOR_SIZE.height)
+	if (low_res_info_.width != InputTensorSize().width || low_res_info_.height != InputTensorSize().height)
 	{
-		LOG_ERROR("Wrong low res size, expecting " << INPUT_TENSOR_SIZE.toString());
+		LOG_ERROR("Wrong low res size, expecting " << InputTensorSize().toString());
 		return false;
 	}
 
@@ -318,8 +316,8 @@ bool YoloSegmentation::Process(CompletedRequestPtr &completed_request)
 	if (low_res_info_.pixel_format == libcamera::formats::YUV420)
 	{
 		StreamInfo rgb_info;
-		rgb_info.width = INPUT_TENSOR_SIZE.width;
-		rgb_info.height = INPUT_TENSOR_SIZE.height;
+		rgb_info.width = InputTensorSize().width;
+		rgb_info.height = InputTensorSize().height;
 		rgb_info.stride = rgb_info.width * 3;
 
 		input = allocator_.Allocate(rgb_info.stride * rgb_info.height);
@@ -390,19 +388,19 @@ bool YoloSegmentation::runInference(uint8_t *input, uint32_t *output)
 	filter(roi, yolo_params_);
 
 	// RGB -> BGR for cv::imshow
-	for (unsigned int j = 0; j < INPUT_TENSOR_SIZE.height; j++)
+	for (unsigned int j = 0; j < InputTensorSize().height; j++)
 	{
-		uint8_t *ptr = input + j * INPUT_TENSOR_SIZE.width * 3;
-		for (unsigned int i = 0; i < INPUT_TENSOR_SIZE.width; i++)
+		uint8_t *ptr = input + j * InputTensorSize().width * 3;
+		for (unsigned int i = 0; i < InputTensorSize().width; i++)
 		{
 			const uint8_t t = ptr[0];
 			ptr[0] = ptr[2], ptr[2] = t;
 			ptr += 3;
 		}
 	}
 
-	cv::Mat image(INPUT_TENSOR_SIZE.height, INPUT_TENSOR_SIZE.width, CV_8UC3, (void *)input,
-				  INPUT_TENSOR_SIZE.width * 3);
+	cv::Mat image(InputTensorSize().height, InputTensorSize().width, CV_8UC3, (void *)input,
+				  InputTensorSize().width * 3);
 
 	std::vector<HailoDetectionPtr> detections = hailo_common::get_hailo_detections(roi);
 	for (auto &detection : detections)
@@ -412,10 +410,10 @@ bool YoloSegmentation::runInference(uint8_t *input, uint32_t *output)
 
 		auto bbox = detection->get_bbox();
 
-		cv::rectangle(image, cv::Point2f(bbox.xmin() * float(INPUT_TENSOR_SIZE.width),
-										 bbox.ymin() * float(INPUT_TENSOR_SIZE.height)),
-							 cv::Point2f(bbox.xmax() * float(INPUT_TENSOR_SIZE.width),
-										 bbox.ymax() * float(INPUT_TENSOR_SIZE.height)),
+		cv::rectangle(image, cv::Point2f(bbox.xmin() * float(InputTensorSize().width),
+										 bbox.ymin() * float(InputTensorSize().height)),
+							 cv::Point2f(bbox.xmax() * float(InputTensorSize().width),
+										 bbox.ymax() * float(InputTensorSize().height)),
 					  cv::Scalar(0, 0, 255), 1);
 
 		draw_all(image, detection, 0);
@@ -439,8 +437,8 @@ void YoloSegmentation::displayThread()
 		{
 			current_image = std::move(msg.payload);
 
-			cv::Mat image(INPUT_TENSOR_SIZE.height, INPUT_TENSOR_SIZE.width, CV_8UC3, (void *)current_image.get(),
-						  INPUT_TENSOR_SIZE.width * 3);
+			cv::Mat image(InputTensorSize().height, InputTensorSize().width, CV_8UC3, (void *)current_image.get(),
+						  InputTensorSize().width * 3);
 
 			cv::imshow("Segmentation Results", image);
 			cv::waitKey(1);

diff --git a/post_processing_stages/hailo/hailo_yolov8_pose.cpp b/post_processing_stages/hailo/hailo_yolov8_pose.cpp
@@ -24,13 +24,6 @@ using PostProcFuncPtr = std::pair<std::vector<KeyPt>, std::vector<PairPairs>>(*)
 #define NAME "hailo_yolo_pose"
 #define POSTPROC_LIB "libyolov8pose_post.so"
 
-namespace
-{
-
-constexpr Size INPUT_TENSOR_SIZE { 640, 640 };
-
-} // namespace
-
 class YoloPose : public HailoPostProcessingStage
 {
 public:
@@ -78,9 +71,9 @@ bool YoloPose::Process(CompletedRequestPtr &completed_request)
 		return false;
 	}
 
-	if (low_res_info_.width != INPUT_TENSOR_SIZE.width || low_res_info_.height != INPUT_TENSOR_SIZE.height)
+	if (low_res_info_.width != InputTensorSize().width || low_res_info_.height != InputTensorSize().height)
 	{
-		LOG_ERROR("Wrong low res size, expecting " << INPUT_TENSOR_SIZE.toString());
+		LOG_ERROR("Wrong low res size, expecting " << InputTensorSize().toString());
 		return false;
 	}
 
@@ -92,8 +85,8 @@ bool YoloPose::Process(CompletedRequestPtr &completed_request)
 	if (low_res_info_.pixel_format == libcamera::formats::YUV420)
 	{
 		StreamInfo rgb_info;
-		rgb_info.width = INPUT_TENSOR_SIZE.width;
-		rgb_info.height = INPUT_TENSOR_SIZE.height;
+		rgb_info.width = InputTensorSize().width;
+		rgb_info.height = InputTensorSize().height;
 		rgb_info.stride = rgb_info.width * 3;
 
 		input = allocator_.Allocate(rgb_info.stride * rgb_info.height);