[libcamera-devel,2/2] qcam: format_converter: Add NV formats support

Message ID 20190504203354.20870-2-paul.elder@ideasonboard.com
State Superseded
Headers show
Series
  • [libcamera-devel,1/2] qcam: format_converter: Add formatFamily enum
Related show

Commit Message

Paul Elder May 4, 2019, 8:33 p.m. UTC
Add support for some NV formats:
- V4L2_PIX_FMT_NV12, V4L2_PIX_FMT_NV21
- V4L2_PIX_FMT_NV16, V4L2_PIX_FMT_NV61
- V4L2_PIX_FMT_NV24, V4L2_PIX_FMT_NV42

Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>
---
 src/qcam/format_converter.cpp | 62 +++++++++++++++++++++++++++++++++++
 src/qcam/format_converter.h   |  7 ++++
 2 files changed, 69 insertions(+)

Comments

Laurent Pinchart May 4, 2019, 11:03 p.m. UTC | #1
Hi Paul,

Thank you for the patch.

On Sat, May 04, 2019 at 04:33:54PM -0400, Paul Elder wrote:
> Add support for some NV formats:
> - V4L2_PIX_FMT_NV12, V4L2_PIX_FMT_NV21
> - V4L2_PIX_FMT_NV16, V4L2_PIX_FMT_NV61
> - V4L2_PIX_FMT_NV24, V4L2_PIX_FMT_NV42
> 
> Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>
> ---
>  src/qcam/format_converter.cpp | 62 +++++++++++++++++++++++++++++++++++
>  src/qcam/format_converter.h   |  7 ++++
>  2 files changed, 69 insertions(+)
> 
> diff --git a/src/qcam/format_converter.cpp b/src/qcam/format_converter.cpp
> index 192767c..ca91c7e 100644
> --- a/src/qcam/format_converter.cpp
> +++ b/src/qcam/format_converter.cpp
> @@ -72,6 +72,42 @@ int FormatConverter::configure(unsigned int format, unsigned int width,
>  		y_pos_ = 0;
>  		cb_pos_ = 1;
>  		break;
> +	case V4L2_PIX_FMT_NV12:
> +		formatFamily_ = NV;
> +		xDownSample_ = 2;
> +		yDownSample_ = 2;

The usual term is sub-sampling, not down-sampling. It is also customary
to talk about horizontal and vertical sub-sampling instead of x and y.

> +		nvSwap_ = false;
> +		break;
> +	case V4L2_PIX_FMT_NV21:
> +		formatFamily_ = NV;
> +		xDownSample_ = 2;
> +		yDownSample_ = 2;
> +		nvSwap_ = true;
> +		break;
> +	case V4L2_PIX_FMT_NV16:
> +		formatFamily_ = NV;
> +		xDownSample_ = 2;
> +		yDownSample_ = 1;
> +		nvSwap_ = false;
> +		break;
> +	case V4L2_PIX_FMT_NV61:
> +		formatFamily_ = NV;
> +		xDownSample_ = 2;
> +		yDownSample_ = 1;
> +		nvSwap_ = true;
> +		break;
> +	case V4L2_PIX_FMT_NV24:
> +		formatFamily_ = NV;
> +		xDownSample_ = 1;
> +		yDownSample_ = 1;
> +		nvSwap_ = false;
> +		break;
> +	case V4L2_PIX_FMT_NV42:
> +		formatFamily_ = NV;
> +		xDownSample_ = 1;
> +		yDownSample_ = 1;
> +		nvSwap_ = true;
> +		break;
>  	case V4L2_PIX_FMT_MJPEG:
>  		formatFamily_ = MJPEG;
>  		break;
> @@ -99,6 +135,9 @@ void FormatConverter::convert(const unsigned char *src, size_t size,
>  	case RGB:
>  		convertRGB(src, dst->bits());
>  		break;
> +	case NV:
> +		convertNV(src, dst->bits());
> +		break;
>  	};
>  }
>  
> @@ -171,3 +210,26 @@ void FormatConverter::convertYUV(const unsigned char *src, unsigned char *dst)
>  		}
>  	}
>  }
> +
> +void FormatConverter::convertNV(const unsigned char *src, unsigned char *dst)

We try to define functions in the same order they are declared in the
class, at least within an access type (public, protected and private).
That is, for example, public and private functions can be interleaved in
the .cpp file, but public functions should appear in the same order as
in the .h file, and private functions as well.

> +{
> +	unsigned int i, j;
> +	int r, g, b, y, cr, cb, loc;
> +
> +	for (j = 0; j < height_; j++) {
> +		for (i = 0; i < width_; i++) {

It would be nice to use x and y instead of i and j, but y is already
taken for luma. 

> +			y = src[j * width_ + i];
> +			loc = height_ * width_ +
> +			      (j / yDownSample_) * width_ * 2 / xDownSample_ +
> +			      (i & -xDownSample_) * 2 / xDownSample_;
> +			cb = nvSwap_ ? src[loc + 1] : src[loc];
> +			cr = nvSwap_ ? src[loc] : src[loc + 1];
> +
> +			yuv_to_rgb(y, cb, cr, &r, &g, &b);
> +			dst[j * width_ * 4 + i * 4 + 0] = b;
> +			dst[j * width_ * 4 + i * 4 + 1] = g;
> +			dst[j * width_ * 4 + i * 4 + 2] = r;
> +			dst[j * width_ * 4 + i * 4 + 3] = 0xff;

You could increase the dst pointer instead, as in

			dst[0] = b;
			dst[1] = g;
 			dst[2] = r;
			dst[3] = 0xff;
			dst += 4;

I think this would be more efficient. Another improvement would be to
compute unroll the inner loop slightly by computing two dst pixels one after the
other. This should allow you to optimise the body of the inner loop by
reducing the calculation performed for each iteration. The following
code is completely untested but gives you an idea of what I mean.

	unsigned int c_stride = width_ * (2 / xDownSampling_);
	unsigned int c_increment = xDownSampling_ ? 1 : 0,
	int r, g, b;

	for (unsigned int y = 0; y < height_; y++) {
		unsigned char *src_y = src + y * width_;
		unsigned char *src_cb = src + (y / yDownSample_) * c_stride,
		unsigned char *src_cr = src + (y / yDownSample_) * c_stride + 1;

		if (nvSwap_)
			std::swap(src_cb, src_cr);

		for (unsigned int x = 0; i x width_; x += 2) {
			yuv_to_rgb(*src_y, *src_cb, *src_cr, &r, &g, &b);
			dst[0] = b;
			dst[1] = g;
 			dst[2] = r;
			dst[3] = 0xff;
			src_y++;
			src_cb += c_increment;
			src_cr += c_increment;
			dst += 4;

			yuv_to_rgb(*src_y, *src_cb, *src_cr, &r, &g, &b);
			dst[0] = b;
			dst[1] = g;
 			dst[2] = r;
			dst[3] = 0xff;
			src_y++;
			src_cb++;
			src_cr++;
			dst += 4;
		}
	}

We could go one step further and compute four pixels in the inner loop,
two horizontally and two vertically, possible saving a bit more
calculation, but then we would lose cache locality, so I think the
result would be slower.

> +		}
> +	}
> +}
> diff --git a/src/qcam/format_converter.h b/src/qcam/format_converter.h
> index 9820982..48af0fc 100644
> --- a/src/qcam/format_converter.h
> +++ b/src/qcam/format_converter.h
> @@ -16,6 +16,7 @@ class FormatConverter
>  public:
>  	enum formatFamily {
>  		MJPEG,
> +		NV,
>  		RGB,
>  		YUV,
>  	};
> @@ -26,6 +27,7 @@ public:
>  	void convert(const unsigned char *src, size_t size, QImage *dst);
>  
>  private:
> +	void convertNV(const unsigned char *src, unsigned char *dst);
>  	void convertRGB(const unsigned char *src, unsigned char *dst);
>  	void convertYUV(const unsigned char *src, unsigned char *dst);
>  
> @@ -35,6 +37,11 @@ private:
>  
>  	enum formatFamily formatFamily_;
>  
> +	/* NV parameters */
> +	unsigned int xDownSample_;
> +	unsigned int yDownSample_;
> +	bool nvSwap_;
> +
>  	/* RGB parameters */
>  	unsigned int bpp_;
>  	unsigned int r_pos_;

Patch

diff --git a/src/qcam/format_converter.cpp b/src/qcam/format_converter.cpp
index 192767c..ca91c7e 100644
--- a/src/qcam/format_converter.cpp
+++ b/src/qcam/format_converter.cpp
@@ -72,6 +72,42 @@  int FormatConverter::configure(unsigned int format, unsigned int width,
 		y_pos_ = 0;
 		cb_pos_ = 1;
 		break;
+	case V4L2_PIX_FMT_NV12:
+		formatFamily_ = NV;
+		xDownSample_ = 2;
+		yDownSample_ = 2;
+		nvSwap_ = false;
+		break;
+	case V4L2_PIX_FMT_NV21:
+		formatFamily_ = NV;
+		xDownSample_ = 2;
+		yDownSample_ = 2;
+		nvSwap_ = true;
+		break;
+	case V4L2_PIX_FMT_NV16:
+		formatFamily_ = NV;
+		xDownSample_ = 2;
+		yDownSample_ = 1;
+		nvSwap_ = false;
+		break;
+	case V4L2_PIX_FMT_NV61:
+		formatFamily_ = NV;
+		xDownSample_ = 2;
+		yDownSample_ = 1;
+		nvSwap_ = true;
+		break;
+	case V4L2_PIX_FMT_NV24:
+		formatFamily_ = NV;
+		xDownSample_ = 1;
+		yDownSample_ = 1;
+		nvSwap_ = false;
+		break;
+	case V4L2_PIX_FMT_NV42:
+		formatFamily_ = NV;
+		xDownSample_ = 1;
+		yDownSample_ = 1;
+		nvSwap_ = true;
+		break;
 	case V4L2_PIX_FMT_MJPEG:
 		formatFamily_ = MJPEG;
 		break;
@@ -99,6 +135,9 @@  void FormatConverter::convert(const unsigned char *src, size_t size,
 	case RGB:
 		convertRGB(src, dst->bits());
 		break;
+	case NV:
+		convertNV(src, dst->bits());
+		break;
 	};
 }
 
@@ -171,3 +210,26 @@  void FormatConverter::convertYUV(const unsigned char *src, unsigned char *dst)
 		}
 	}
 }
+
+void FormatConverter::convertNV(const unsigned char *src, unsigned char *dst)
+{
+	unsigned int i, j;
+	int r, g, b, y, cr, cb, loc;
+
+	for (j = 0; j < height_; j++) {
+		for (i = 0; i < width_; i++) {
+			y = src[j * width_ + i];
+			loc = height_ * width_ +
+			      (j / yDownSample_) * width_ * 2 / xDownSample_ +
+			      (i & -xDownSample_) * 2 / xDownSample_;
+			cb = nvSwap_ ? src[loc + 1] : src[loc];
+			cr = nvSwap_ ? src[loc] : src[loc + 1];
+
+			yuv_to_rgb(y, cb, cr, &r, &g, &b);
+			dst[j * width_ * 4 + i * 4 + 0] = b;
+			dst[j * width_ * 4 + i * 4 + 1] = g;
+			dst[j * width_ * 4 + i * 4 + 2] = r;
+			dst[j * width_ * 4 + i * 4 + 3] = 0xff;
+		}
+	}
+}
diff --git a/src/qcam/format_converter.h b/src/qcam/format_converter.h
index 9820982..48af0fc 100644
--- a/src/qcam/format_converter.h
+++ b/src/qcam/format_converter.h
@@ -16,6 +16,7 @@  class FormatConverter
 public:
 	enum formatFamily {
 		MJPEG,
+		NV,
 		RGB,
 		YUV,
 	};
@@ -26,6 +27,7 @@  public:
 	void convert(const unsigned char *src, size_t size, QImage *dst);
 
 private:
+	void convertNV(const unsigned char *src, unsigned char *dst);
 	void convertRGB(const unsigned char *src, unsigned char *dst);
 	void convertYUV(const unsigned char *src, unsigned char *dst);
 
@@ -35,6 +37,11 @@  private:
 
 	enum formatFamily formatFamily_;
 
+	/* NV parameters */
+	unsigned int xDownSample_;
+	unsigned int yDownSample_;
+	bool nvSwap_;
+
 	/* RGB parameters */
 	unsigned int bpp_;
 	unsigned int r_pos_;