[libcamera-devel,RFC,v3,4/5] libcamera: converter: add software converter
diff mbox series

Message ID 20230928185537.20178-5-andrey.konovalov@linaro.org
State New
Headers show
Series
  • libcamera: converter: generalize Converter to remove MediaDevice dependency
Related show

Commit Message

Andrey Konovalov Sept. 28, 2023, 6:55 p.m. UTC
Use the Converter interface to implement bilinear Bayer demosaic
filtering and very simple AWB on CPU.

The only output format currently supported is RGB888.

Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>
---
 .../internal/converter/converter_softw.h      | 100 ++++
 .../libcamera/internal/converter/meson.build  |   1 +
 src/libcamera/converter/converter_softw.cpp   | 445 ++++++++++++++++++
 src/libcamera/converter/meson.build           |   3 +-
 4 files changed, 548 insertions(+), 1 deletion(-)
 create mode 100644 include/libcamera/internal/converter/converter_softw.h
 create mode 100644 src/libcamera/converter/converter_softw.cpp

Comments

Laurent Pinchart Sept. 30, 2023, 9:59 a.m. UTC | #1
Hi Andrey,

Thank you for the patch. I haven't reviewed everything in details as
there are parts that I expect will change, but here are a few comments
already.

On Thu, Sep 28, 2023 at 09:55:36PM +0300, Andrey Konovalov via libcamera-devel wrote:
> Use the Converter interface to implement bilinear Bayer demosaic
> filtering and very simple AWB on CPU.
> 
> The only output format currently supported is RGB888.
> 
> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>
> ---
>  .../internal/converter/converter_softw.h      | 100 ++++
>  .../libcamera/internal/converter/meson.build  |   1 +
>  src/libcamera/converter/converter_softw.cpp   | 445 ++++++++++++++++++
>  src/libcamera/converter/meson.build           |   3 +-
>  4 files changed, 548 insertions(+), 1 deletion(-)
>  create mode 100644 include/libcamera/internal/converter/converter_softw.h
>  create mode 100644 src/libcamera/converter/converter_softw.cpp
> 
> diff --git a/include/libcamera/internal/converter/converter_softw.h b/include/libcamera/internal/converter/converter_softw.h
> new file mode 100644
> index 00000000..daa2d6da
> --- /dev/null
> +++ b/include/libcamera/internal/converter/converter_softw.h
> @@ -0,0 +1,100 @@
> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> +/*
> + * Copyright (C) 2023, Linaro Ltd
> + *
> + * converter_softw.h - interface of software converter (runs 100% on CPU)
> + */
> +
> +#pragma once
> +
> +#include <functional>
> +#include <map>
> +#include <memory>
> +#include <string>
> +#include <tuple>
> +#include <vector>
> +
> +#include <libcamera/base/log.h>
> +#include <libcamera/base/mutex.h>
> +#include <libcamera/base/signal.h>
> +#include <libcamera/base/thread.h>
> +
> +#include <libcamera/pixel_format.h>
> +
> +#include "libcamera/internal/converter.h"
> +
> +namespace libcamera {
> +
> +class FrameBuffer;
> +class MediaDevice;
> +class Size;
> +class SizeRange;
> +struct StreamConfiguration;
> +
> +class SwConverter : public Converter
> +{
> +public:
> +	SwConverter(MediaDevice *media);
> +
> +	int loadConfiguration([[maybe_unused]] const std::string &filename) { return 0; }
> +	bool isValid() const { return isp_ != nullptr; }
> +
> +	std::vector<PixelFormat> formats(PixelFormat input);
> +	SizeRange sizes(const Size &input);
> +
> +	std::tuple<unsigned int, unsigned int>
> +	strideAndFrameSize(const PixelFormat &pixelFormat, const Size &size);
> +
> +	int configure(const StreamConfiguration &inputCfg,
> +		      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfg);
> +	int exportBuffers(unsigned int ouput, unsigned int count,
> +			  std::vector<std::unique_ptr<FrameBuffer>> *buffers);
> +
> +	void process(FrameBuffer *input, FrameBuffer *output);
> +	int start();
> +	void stop();
> +
> +	int queueBuffers(FrameBuffer *input,
> +			 const std::map<unsigned int, FrameBuffer *> &outputs);
> +
> +private:
> +	class Isp : public Object
> +	{
> +	public:
> +		Isp(SwConverter *converter);
> +		~Isp();
> +
> +		int configure(const StreamConfiguration &inputCfg,
> +			      const StreamConfiguration &outputCfg);
> +		int exportBuffers(unsigned int count,
> +				  std::vector<std::unique_ptr<FrameBuffer>> *buffers);
> +		void process(FrameBuffer *input, FrameBuffer *output);
> +		int start();
> +		void stop();
> +		void waitForIdle();
> +
> +	private:
> +		void debayer(uint8_t *dst, const uint8_t *src);
> +
> +		SwConverter *converter_;
> +
> +		Thread thread_;

I think you can simplify thread management by inheriting from the Thread
class instead of the Object class. See PostProcessorWorker in the
Android camera HAL implementation for instance, or
CameraManager::Private. I would then possibly also name this class
ISPWorker.

> +
> +		libcamera::Mutex idleMutex_;
> +		libcamera::ConditionVariable idleCV_;
> +		bool idle_ LIBCAMERA_TSA_GUARDED_BY(idleMutex_);
> +
> +		unsigned int width_;
> +		unsigned int height_;
> +		unsigned int stride_;
> +		Point red_shift_;

s/red_shift_/redShift_/

> +
> +		unsigned long rNumerat_, rDenomin_; /* red gain for AWB */
> +		unsigned long bNumerat_, bDenomin_; /* blue gain for AWB */
> +		unsigned long gNumerat_, gDenomin_; /* green gain for AWB */
> +	};
> +
> +	std::unique_ptr<Isp> isp_;
> +};
> +
> +} /* namespace libcamera */
> diff --git a/include/libcamera/internal/converter/meson.build b/include/libcamera/internal/converter/meson.build
> index 891e79e7..843a0483 100644
> --- a/include/libcamera/internal/converter/meson.build
> +++ b/include/libcamera/internal/converter/meson.build
> @@ -1,5 +1,6 @@
>  # SPDX-License-Identifier: CC0-1.0
>  
>  libcamera_internal_headers += files([
> +    'converter_softw.h',
>      'converter_v4l2_m2m.h',
>  ])
> diff --git a/src/libcamera/converter/converter_softw.cpp b/src/libcamera/converter/converter_softw.cpp
> new file mode 100644
> index 00000000..67245715
> --- /dev/null
> +++ b/src/libcamera/converter/converter_softw.cpp
> @@ -0,0 +1,445 @@
> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> +/*
> + * Copyright (C) 2023, Linaro Ltd
> + *
> + * converter_softw.h - interface of software converter (runs 100% on CPU)
> + */
> +
> +#include "libcamera/internal/converter/converter_softw.h"
> +
> +#include <sys/mman.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +#include <libcamera/formats.h>
> +#include <libcamera/stream.h>
> +
> +#include "libcamera/internal/bayer_format.h"
> +#include "libcamera/internal/framebuffer.h"
> +#include "libcamera/internal/mapped_framebuffer.h"
> +
> +namespace libcamera {
> +
> +LOG_DECLARE_CATEGORY(Converter)
> +
> +SwConverter::SwConverter([[maybe_unused]] MediaDevice *media)
> +	: Converter()
> +{
> +	isp_ = std::make_unique<SwConverter::Isp>(this);
> +}
> +
> +std::vector<PixelFormat> SwConverter::formats(PixelFormat input)
> +{
> +	BayerFormat inputFormat = BayerFormat::fromPixelFormat(input);
> +
> +	/* Only RAW10P is currently supported */
> +	if (inputFormat.bitDepth != 10 ||
> +	    inputFormat.packing != BayerFormat::Packing::CSI2) {
> +		LOG(Converter, Info)
> +			<< "Unsupported input format " << input.toString();
> +		return {};
> +	}
> +
> +	return { formats::RGB888 };
> +}
> +
> +SizeRange SwConverter::sizes(const Size &input)
> +{
> +	if (input.width < 2 || input.height < 2) {
> +		LOG(Converter, Error)
> +			<< "Input format size too small: " << input.toString();
> +		return {};
> +	}
> +
> +	return SizeRange(Size(input.width - 2, input.height - 2));
> +}
> +
> +std::tuple<unsigned int, unsigned int>
> +SwConverter::strideAndFrameSize(const PixelFormat &pixelFormat,
> +				const Size &size)
> +{
> +	/* Only RGB888 output is currently supported */
> +	if (pixelFormat != formats::RGB888)
> +		return {};
> +
> +	unsigned int stride = size.width * 3;
> +	return std::make_tuple(stride, stride * (size.height));
> +}
> +
> +int SwConverter::configure(const StreamConfiguration &inputCfg,
> +			   const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
> +{
> +	if (outputCfgs.size() != 1) {
> +		LOG(Converter, Error)
> +			<< "Unsupported number of output streams: "
> +			<< outputCfgs.size();
> +		return -EINVAL;
> +	}
> +
> +	return isp_->invokeMethod(&SwConverter::Isp::configure,
> +				  ConnectionTypeBlocking, inputCfg, outputCfgs[0]);
> +}
> +
> +SwConverter::Isp::Isp(SwConverter *converter)
> +	: converter_(converter)
> +{
> +	moveToThread(&thread_);

Not needed if you inherit from Thread.

> +	thread_.start();

Is there a reason to start the thread here instead of
SwConverter::start() ? Same question for stop.

If you start the thread in SwConverter::start(), you won't need to call
invokeMethod() in SwConverter::configure().

> +}
> +
> +SwConverter::Isp::~Isp()
> +{
> +	thread_.exit();
> +	thread_.wait();
> +}
> +
> +int SwConverter::Isp::configure(const StreamConfiguration &inputCfg,
> +				const StreamConfiguration &outputCfg)
> +{
> +	BayerFormat bayerFormat =
> +		BayerFormat::fromPixelFormat(inputCfg.pixelFormat);
> +	width_ = inputCfg.size.width;
> +	height_ = inputCfg.size.height;
> +	stride_ = inputCfg.stride;
> +
> +	if (bayerFormat.bitDepth != 10 ||
> +	    bayerFormat.packing != BayerFormat::Packing::CSI2 ||
> +	    width_ < 2 || height_ < 2) {
> +		LOG(Converter, Error) << "Input format "
> +				      << inputCfg.size << "-"
> +				      << inputCfg.pixelFormat
> +				      << "not supported";
> +		return -EINVAL;
> +	}
> +
> +	switch (bayerFormat.order) {
> +	case BayerFormat::BGGR:
> +		red_shift_ = Point(0, 0);
> +		break;
> +	case BayerFormat::GBRG:
> +		red_shift_ = Point(1, 0);
> +		break;
> +	case BayerFormat::GRBG:
> +		red_shift_ = Point(0, 1);
> +		break;
> +	case BayerFormat::RGGB:
> +	default:
> +		red_shift_ = Point(1, 1);
> +		break;
> +	}
> +
> +	if (outputCfg.size.width != width_ - 2 ||
> +	    outputCfg.size.height != height_ - 2 ||
> +	    outputCfg.stride != (width_ - 2) * 3 ||
> +	    outputCfg.pixelFormat != formats::RGB888) {
> +		LOG(Converter, Error)
> +			<< "Output format not supported";
> +		return -EINVAL;
> +	}
> +
> +	LOG(Converter, Info) << "SwConverter configuration: "
> +			     << inputCfg.size << "-" << inputCfg.pixelFormat
> +			     << " -> "
> +			     << outputCfg.size << "-" << outputCfg.pixelFormat;
> +
> +	/* set r/g/b gains to 1.0 until frame data collected */
> +	rNumerat_ = rDenomin_ = 1;
> +	bNumerat_ = bDenomin_ = 1;
> +	gNumerat_ = gDenomin_ = 1;
> +
> +	return 0;
> +}
> +
> +int SwConverter::exportBuffers(unsigned int output, unsigned int count,
> +			       std::vector<std::unique_ptr<FrameBuffer>> *buffers)
> +{
> +	/* single output for now */
> +	if (output >= 1)
> +		return -EINVAL;
> +
> +	return isp_->invokeMethod(&SwConverter::Isp::exportBuffers,
> +				  ConnectionTypeBlocking, count, buffers);
> +}
> +
> +int SwConverter::Isp::exportBuffers(unsigned int count,
> +				    std::vector<std::unique_ptr<FrameBuffer>> *buffers)
> +{
> +	/* V4L2_PIX_FMT_BGR24 aka 'BGR3' for output: */
> +	unsigned int bufSize = (height_ - 2) * (width_ - 2) * 3;
> +
> +	for (unsigned int i = 0; i < count; i++) {
> +		std::string name = "frame-" + std::to_string(i);
> +
> +		const int ispFd = memfd_create(name.c_str(), 0);
> +		int ret = ftruncate(ispFd, bufSize);
> +		if (ret < 0) {
> +			LOG(Converter, Error) << "ftruncate() for memfd failed "
> +					      << strerror(-ret);
> +			return ret;
> +		}
> +
> +		FrameBuffer::Plane outPlane;
> +		outPlane.fd = SharedFD(std::move(ispFd));

FrameBuffer fds need to be dmabufs, not memfds.

> +		outPlane.offset = 0;
> +		outPlane.length = bufSize;
> +
> +		std::vector<FrameBuffer::Plane> planes{ outPlane };
> +		buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes)));
> +	}
> +
> +	return count;
> +}
> +
> +int SwConverter::Isp::start()
> +{
> +	return 0;
> +}
> +
> +void SwConverter::Isp::stop()
> +{
> +}
> +
> +void SwConverter::Isp::waitForIdle()
> +{
> +	MutexLocker locker(idleMutex_);
> +
> +	idleCV_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(idleMutex_) {
> +			     return idle_;
> +		     });
> +}
> +
> +int SwConverter::start()
> +{
> +	return isp_->invokeMethod(&SwConverter::Isp::start,
> +				  ConnectionTypeBlocking);
> +}
> +
> +void SwConverter::stop()
> +{
> +	isp_->invokeMethod(&SwConverter::Isp::stop,
> +			   ConnectionTypeBlocking);
> +	isp_->invokeMethod(&SwConverter::Isp::waitForIdle,
> +			   ConnectionTypeDirect);
> +}
> +
> +int SwConverter::queueBuffers(FrameBuffer *input,
> +			      const std::map<unsigned int, FrameBuffer *> &outputs)
> +{
> +	unsigned int mask = 0;
> +
> +	/*
> +	 * Validate the outputs as a sanity check: at least one output is
> +	 * required, all outputs must reference a valid stream and no two
> +	 * outputs can reference the same stream.
> +	 */
> +	if (outputs.empty())
> +		return -EINVAL;
> +
> +	for (auto [index, buffer] : outputs) {
> +		if (!buffer)
> +			return -EINVAL;
> +		if (index >= 1) /* only single stream atm */
> +			return -EINVAL;
> +		if (mask & (1 << index))
> +			return -EINVAL;
> +
> +		mask |= 1 << index;
> +	}
> +
> +	process(input, outputs.at(0));
> +
> +	return 0;
> +}
> +
> +void SwConverter::process(FrameBuffer *input, FrameBuffer *output)
> +{
> +	isp_->invokeMethod(&SwConverter::Isp::process,
> +			   ConnectionTypeQueued, input, output);
> +}
> +
> +void SwConverter::Isp::process(FrameBuffer *input, FrameBuffer *output)
> +{
> +	{
> +		MutexLocker locker(idleMutex_);
> +		idle_ = false;
> +	}
> +
> +	/* Copy metadata from the input buffer */
> +	FrameMetadata &metadata = output->_d()->metadata();
> +	metadata.status = input->metadata().status;
> +	metadata.sequence = input->metadata().sequence;
> +	metadata.timestamp = input->metadata().timestamp;
> +
> +	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
> +	MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
> +	if (!in.isValid() || !out.isValid()) {
> +		LOG(Converter, Error) << "mmap-ing buffer(s) failed";
> +		metadata.status = FrameMetadata::FrameError;
> +		converter_->outputBufferReady.emit(output);
> +		converter_->inputBufferReady.emit(input);
> +		return;
> +	}
> +
> +	debayer(out.planes()[0].data(), in.planes()[0].data());
> +	metadata.planes()[0].bytesused = out.planes()[0].size();
> +
> +	converter_->outputBufferReady.emit(output);
> +	converter_->inputBufferReady.emit(input);
> +
> +	{
> +		MutexLocker locker(idleMutex_);
> +		idle_ = true;
> +	}
> +	idleCV_.notify_all();
> +}
> +
> +void SwConverter::Isp::debayer(uint8_t *dst, const uint8_t *src)
> +{
> +	/* RAW10P input format is assumed */
> +
> +	/* output buffer is in BGR24 format and is of (width-2)*(height-2) */
> +
> +	int w_out = width_ - 2;
> +	int h_out = height_ - 2;
> +
> +	unsigned long sumR = 0;
> +	unsigned long sumB = 0;
> +	unsigned long sumG = 0;
> +
> +	for (int y = 0; y < h_out; y++) {
> +		const uint8_t *pin_base = src + (y + 1) * stride_;
> +		uint8_t *pout = dst + y * w_out * 3;
> +		int phase_y = (y + red_shift_.y) % 2;
> +
> +		for (int x = 0; x < w_out; x++) {
> +			int phase_x = (x + red_shift_.x) % 2;
> +			int phase = 2 * phase_y + phase_x;
> +
> +			/* x part of the offset in the input buffer: */
> +			int x_m1 = x + x / 4;		/* offset for (x-1) */
> +			int x_0 = x + 1 + (x + 1) / 4;	/* offset for x */
> +			int x_p1 = x + 2 + (x + 2) / 4;	/* offset for (x+1) */
> +			/* the colour component value to write to the output */
> +			unsigned val;
> +
> +			switch (phase) {
> +			case 0: /* at R pixel */
> +				/* blue: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */
> +				val = ( *(pin_base + x_m1 - stride_)
> +					+ *(pin_base + x_p1 - stride_)
> +					+ *(pin_base + x_m1 + stride_)
> +					+ *(pin_base + x_p1 + stride_) ) >> 2;
> +				val = val * bNumerat_ / bDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */
> +				val = ( *(pin_base + x_0 - stride_)
> +					+ *(pin_base + x_p1)
> +					+ *(pin_base + x_m1)
> +					+ *(pin_base + x_0 + stride_) ) >> 2;
> +				val = val * gNumerat_ / gDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* red: (0,0) */
> +				val = *(pin_base + x_0);
> +				sumR += val;
> +				val = val * rNumerat_ / rDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				break;
> +			case 1: /* at Gr pixel */
> +				/* blue: ((0,-1) + (0,1)) / 2 */
> +				val = ( *(pin_base + x_0 - stride_)
> +					+ *(pin_base + x_0 + stride_) ) >> 1;
> +				val = val * bNumerat_ / bDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* green: (0,0) */
> +				val = *(pin_base + x_0);
> +				sumG += val;
> +				val = val * gNumerat_ / gDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* red: ((-1,0) + (1,0)) / 2 */
> +				val = ( *(pin_base + x_m1)
> +					+ *(pin_base + x_p1) ) >> 1;
> +				val = val * rNumerat_ / rDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				break;
> +			case 2: /* at Gb pixel */
> +				/* blue: ((-1,0) + (1,0)) / 2 */
> +				val = ( *(pin_base + x_m1)
> +					+ *(pin_base + x_p1) ) >> 1;
> +				val = val * bNumerat_ / bDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* green: (0,0) */
> +				val = *(pin_base + x_0);
> +				sumG += val;
> +				val = val * gNumerat_ / gDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* red: ((0,-1) + (0,1)) / 2 */
> +				val = ( *(pin_base + x_0 - stride_)
> +					+ *(pin_base + x_0 + stride_) ) >> 1;
> +				val = val * rNumerat_ / rDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				break;
> +			default: /* at B pixel */
> +				/* blue: (0,0) */
> +				val = *(pin_base + x_0);
> +				sumB += val;
> +				val = val * bNumerat_ / bDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */
> +				val = ( *(pin_base + x_0 - stride_)
> +					+ *(pin_base + x_p1)
> +					+ *(pin_base + x_m1)
> +					+ *(pin_base + x_0 + stride_) ) >> 2;
> +				val = val * gNumerat_ / gDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +				/* red: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */
> +				val = ( *(pin_base + x_m1 - stride_)
> +					+ *(pin_base + x_p1 - stride_)
> +					+ *(pin_base + x_m1 + stride_)
> +					+ *(pin_base + x_p1 + stride_) ) >> 2;
> +				val = val * rNumerat_ / rDenomin_;
> +				*pout++ = (uint8_t)std::min(val, 0xffU);
> +			}
> +		}
> +	}
> +
> +	/* calculate red and blue gains for simple AWB */
> +	LOG(Converter, Debug) << "sumR = " << sumR
> +			      << ", sumB = " << sumB << ", sumG = " << sumG;
> +
> +	sumG /= 2; /* the number of G pixels is twice as big vs R and B ones */
> +
> +	/* normalize red, blue, and green sums to fit into 22-bit value */
> +	unsigned long fRed = sumR / 0x400000;
> +	unsigned long fBlue = sumB / 0x400000;
> +	unsigned long fGreen = sumG / 0x400000;
> +	unsigned long fNorm = std::max({ 1UL, fRed, fBlue, fGreen });
> +	sumR /= fNorm;
> +	sumB /= fNorm;
> +	sumG /= fNorm;
> +
> +	LOG(Converter, Debug) << "fNorm = " << fNorm;
> +	LOG(Converter, Debug) << "Normalized: sumR = " << sumR
> +			      << ", sumB= " << sumB << ", sumG = " << sumG;
> +
> +	/* make sure red/blue gains never exceed approximately 256 */
> +	unsigned long minDenom;
> +	rNumerat_ = (sumR + sumB + sumG) / 3;
> +	minDenom = rNumerat_ / 0x100;
> +	rDenomin_ = std::max(minDenom, sumR);
> +	bNumerat_ = rNumerat_;
> +	bDenomin_ = std::max(minDenom, sumB);
> +	gNumerat_ = rNumerat_;
> +	gDenomin_ = std::max(minDenom, sumG);
> +
> +	LOG(Converter, Debug) << "rGain = [ "
> +			      << rNumerat_ << " / " << rDenomin_
> +			      << " ], bGain = [ " << bNumerat_ << " / " << bDenomin_
> +			      << " ], gGain = [ " << gNumerat_ << " / " << gDenomin_
> +			      << " (minDenom = " << minDenom << ")";
> +}
> +
> +static std::initializer_list<std::string> compatibles = {};
> +
> +REGISTER_CONVERTER("linaro-sw-converter", SwConverter, compatibles)
> +
> +} /* namespace libcamera */
> diff --git a/src/libcamera/converter/meson.build b/src/libcamera/converter/meson.build
> index 2aa72fe4..1f1e0ea4 100644
> --- a/src/libcamera/converter/meson.build
> +++ b/src/libcamera/converter/meson.build
> @@ -1,5 +1,6 @@
>  # SPDX-License-Identifier: CC0-1.0
>  
>  libcamera_sources += files([
> -        'converter_v4l2_m2m.cpp'
> +        'converter_softw.cpp',
> +        'converter_v4l2_m2m.cpp',
>  ])
Andrey Konovalov Sept. 30, 2023, 10:11 a.m. UTC | #2
Hi Laurent,

On 30.09.2023 12:59, Laurent Pinchart wrote:
> Hi Andrey,
> 
> Thank you for the patch. I haven't reviewed everything in details as
> there are parts that I expect will change, but here are a few comments
> already.
> 
> On Thu, Sep 28, 2023 at 09:55:36PM +0300, Andrey Konovalov via libcamera-devel wrote:
>> Use the Converter interface to implement bilinear Bayer demosaic
>> filtering and very simple AWB on CPU.
>>
>> The only output format currently supported is RGB888.
>>
>> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>
>> ---
>>   .../internal/converter/converter_softw.h      | 100 ++++
>>   .../libcamera/internal/converter/meson.build  |   1 +
>>   src/libcamera/converter/converter_softw.cpp   | 445 ++++++++++++++++++
>>   src/libcamera/converter/meson.build           |   3 +-
>>   4 files changed, 548 insertions(+), 1 deletion(-)
>>   create mode 100644 include/libcamera/internal/converter/converter_softw.h
>>   create mode 100644 src/libcamera/converter/converter_softw.cpp
>>
>> diff --git a/include/libcamera/internal/converter/converter_softw.h b/include/libcamera/internal/converter/converter_softw.h
>> new file mode 100644
>> index 00000000..daa2d6da
>> --- /dev/null
>> +++ b/include/libcamera/internal/converter/converter_softw.h
>> @@ -0,0 +1,100 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2023, Linaro Ltd
>> + *
>> + * converter_softw.h - interface of software converter (runs 100% on CPU)
>> + */
>> +
>> +#pragma once
>> +
>> +#include <functional>
>> +#include <map>
>> +#include <memory>
>> +#include <string>
>> +#include <tuple>
>> +#include <vector>
>> +
>> +#include <libcamera/base/log.h>
>> +#include <libcamera/base/mutex.h>
>> +#include <libcamera/base/signal.h>
>> +#include <libcamera/base/thread.h>
>> +
>> +#include <libcamera/pixel_format.h>
>> +
>> +#include "libcamera/internal/converter.h"
>> +
>> +namespace libcamera {
>> +
>> +class FrameBuffer;
>> +class MediaDevice;
>> +class Size;
>> +class SizeRange;
>> +struct StreamConfiguration;
>> +
>> +class SwConverter : public Converter
>> +{
>> +public:
>> +	SwConverter(MediaDevice *media);
>> +
>> +	int loadConfiguration([[maybe_unused]] const std::string &filename) { return 0; }
>> +	bool isValid() const { return isp_ != nullptr; }
>> +
>> +	std::vector<PixelFormat> formats(PixelFormat input);
>> +	SizeRange sizes(const Size &input);
>> +
>> +	std::tuple<unsigned int, unsigned int>
>> +	strideAndFrameSize(const PixelFormat &pixelFormat, const Size &size);
>> +
>> +	int configure(const StreamConfiguration &inputCfg,
>> +		      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfg);
>> +	int exportBuffers(unsigned int ouput, unsigned int count,
>> +			  std::vector<std::unique_ptr<FrameBuffer>> *buffers);
>> +
>> +	void process(FrameBuffer *input, FrameBuffer *output);
>> +	int start();
>> +	void stop();
>> +
>> +	int queueBuffers(FrameBuffer *input,
>> +			 const std::map<unsigned int, FrameBuffer *> &outputs);
>> +
>> +private:
>> +	class Isp : public Object
>> +	{
>> +	public:
>> +		Isp(SwConverter *converter);
>> +		~Isp();
>> +
>> +		int configure(const StreamConfiguration &inputCfg,
>> +			      const StreamConfiguration &outputCfg);
>> +		int exportBuffers(unsigned int count,
>> +				  std::vector<std::unique_ptr<FrameBuffer>> *buffers);
>> +		void process(FrameBuffer *input, FrameBuffer *output);
>> +		int start();
>> +		void stop();
>> +		void waitForIdle();
>> +
>> +	private:
>> +		void debayer(uint8_t *dst, const uint8_t *src);
>> +
>> +		SwConverter *converter_;
>> +
>> +		Thread thread_;
> 
> I think you can simplify thread management by inheriting from the Thread
> class instead of the Object class. See PostProcessorWorker in the
> Android camera HAL implementation for instance, or
> CameraManager::Private. I would then possibly also name this class
> ISPWorker.

Yes, the thread management part of this patch needs a clean up.

>> +
>> +		libcamera::Mutex idleMutex_;
>> +		libcamera::ConditionVariable idleCV_;
>> +		bool idle_ LIBCAMERA_TSA_GUARDED_BY(idleMutex_);
>> +
>> +		unsigned int width_;
>> +		unsigned int height_;
>> +		unsigned int stride_;
>> +		Point red_shift_;
> 
> s/red_shift_/redShift_/

OK

>> +
>> +		unsigned long rNumerat_, rDenomin_; /* red gain for AWB */
>> +		unsigned long bNumerat_, bDenomin_; /* blue gain for AWB */
>> +		unsigned long gNumerat_, gDenomin_; /* green gain for AWB */
>> +	};
>> +
>> +	std::unique_ptr<Isp> isp_;
>> +};
>> +
>> +} /* namespace libcamera */
>> diff --git a/include/libcamera/internal/converter/meson.build b/include/libcamera/internal/converter/meson.build
>> index 891e79e7..843a0483 100644
>> --- a/include/libcamera/internal/converter/meson.build
>> +++ b/include/libcamera/internal/converter/meson.build
>> @@ -1,5 +1,6 @@
>>   # SPDX-License-Identifier: CC0-1.0
>>   
>>   libcamera_internal_headers += files([
>> +    'converter_softw.h',
>>       'converter_v4l2_m2m.h',
>>   ])
>> diff --git a/src/libcamera/converter/converter_softw.cpp b/src/libcamera/converter/converter_softw.cpp
>> new file mode 100644
>> index 00000000..67245715
>> --- /dev/null
>> +++ b/src/libcamera/converter/converter_softw.cpp
>> @@ -0,0 +1,445 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2023, Linaro Ltd
>> + *
>> + * converter_softw.h - interface of software converter (runs 100% on CPU)
>> + */
>> +
>> +#include "libcamera/internal/converter/converter_softw.h"
>> +
>> +#include <sys/mman.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +
>> +#include <libcamera/formats.h>
>> +#include <libcamera/stream.h>
>> +
>> +#include "libcamera/internal/bayer_format.h"
>> +#include "libcamera/internal/framebuffer.h"
>> +#include "libcamera/internal/mapped_framebuffer.h"
>> +
>> +namespace libcamera {
>> +
>> +LOG_DECLARE_CATEGORY(Converter)
>> +
>> +SwConverter::SwConverter([[maybe_unused]] MediaDevice *media)
>> +	: Converter()
>> +{
>> +	isp_ = std::make_unique<SwConverter::Isp>(this);
>> +}
>> +
>> +std::vector<PixelFormat> SwConverter::formats(PixelFormat input)
>> +{
>> +	BayerFormat inputFormat = BayerFormat::fromPixelFormat(input);
>> +
>> +	/* Only RAW10P is currently supported */
>> +	if (inputFormat.bitDepth != 10 ||
>> +	    inputFormat.packing != BayerFormat::Packing::CSI2) {
>> +		LOG(Converter, Info)
>> +			<< "Unsupported input format " << input.toString();
>> +		return {};
>> +	}
>> +
>> +	return { formats::RGB888 };
>> +}
>> +
>> +SizeRange SwConverter::sizes(const Size &input)
>> +{
>> +	if (input.width < 2 || input.height < 2) {
>> +		LOG(Converter, Error)
>> +			<< "Input format size too small: " << input.toString();
>> +		return {};
>> +	}
>> +
>> +	return SizeRange(Size(input.width - 2, input.height - 2));
>> +}
>> +
>> +std::tuple<unsigned int, unsigned int>
>> +SwConverter::strideAndFrameSize(const PixelFormat &pixelFormat,
>> +				const Size &size)
>> +{
>> +	/* Only RGB888 output is currently supported */
>> +	if (pixelFormat != formats::RGB888)
>> +		return {};
>> +
>> +	unsigned int stride = size.width * 3;
>> +	return std::make_tuple(stride, stride * (size.height));
>> +}
>> +
>> +int SwConverter::configure(const StreamConfiguration &inputCfg,
>> +			   const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
>> +{
>> +	if (outputCfgs.size() != 1) {
>> +		LOG(Converter, Error)
>> +			<< "Unsupported number of output streams: "
>> +			<< outputCfgs.size();
>> +		return -EINVAL;
>> +	}
>> +
>> +	return isp_->invokeMethod(&SwConverter::Isp::configure,
>> +				  ConnectionTypeBlocking, inputCfg, outputCfgs[0]);
>> +}
>> +
>> +SwConverter::Isp::Isp(SwConverter *converter)
>> +	: converter_(converter)
>> +{
>> +	moveToThread(&thread_);
> 
> Not needed if you inherit from Thread.
> 
>> +	thread_.start();
> 
> Is there a reason to start the thread here instead of
> SwConverter::start() ? Same question for stop.

No good reason. I jwas just trying different options.
While listening to your talk at Embedded Recipes yesterday I've got to that
following the IPA module pattern - start/stop the thread in SwConverter::start()/stop() -
is the more natural way.

> If you start the thread in SwConverter::start(), you won't need to call
> invokeMethod() in SwConverter::configure().
> 
>> +}
>> +
>> +SwConverter::Isp::~Isp()
>> +{
>> +	thread_.exit();
>> +	thread_.wait();
>> +}
>> +
>> +int SwConverter::Isp::configure(const StreamConfiguration &inputCfg,
>> +				const StreamConfiguration &outputCfg)
>> +{
>> +	BayerFormat bayerFormat =
>> +		BayerFormat::fromPixelFormat(inputCfg.pixelFormat);
>> +	width_ = inputCfg.size.width;
>> +	height_ = inputCfg.size.height;
>> +	stride_ = inputCfg.stride;
>> +
>> +	if (bayerFormat.bitDepth != 10 ||
>> +	    bayerFormat.packing != BayerFormat::Packing::CSI2 ||
>> +	    width_ < 2 || height_ < 2) {
>> +		LOG(Converter, Error) << "Input format "
>> +				      << inputCfg.size << "-"
>> +				      << inputCfg.pixelFormat
>> +				      << "not supported";
>> +		return -EINVAL;
>> +	}
>> +
>> +	switch (bayerFormat.order) {
>> +	case BayerFormat::BGGR:
>> +		red_shift_ = Point(0, 0);
>> +		break;
>> +	case BayerFormat::GBRG:
>> +		red_shift_ = Point(1, 0);
>> +		break;
>> +	case BayerFormat::GRBG:
>> +		red_shift_ = Point(0, 1);
>> +		break;
>> +	case BayerFormat::RGGB:
>> +	default:
>> +		red_shift_ = Point(1, 1);
>> +		break;
>> +	}
>> +
>> +	if (outputCfg.size.width != width_ - 2 ||
>> +	    outputCfg.size.height != height_ - 2 ||
>> +	    outputCfg.stride != (width_ - 2) * 3 ||
>> +	    outputCfg.pixelFormat != formats::RGB888) {
>> +		LOG(Converter, Error)
>> +			<< "Output format not supported";
>> +		return -EINVAL;
>> +	}
>> +
>> +	LOG(Converter, Info) << "SwConverter configuration: "
>> +			     << inputCfg.size << "-" << inputCfg.pixelFormat
>> +			     << " -> "
>> +			     << outputCfg.size << "-" << outputCfg.pixelFormat;
>> +
>> +	/* set r/g/b gains to 1.0 until frame data collected */
>> +	rNumerat_ = rDenomin_ = 1;
>> +	bNumerat_ = bDenomin_ = 1;
>> +	gNumerat_ = gDenomin_ = 1;
>> +
>> +	return 0;
>> +}
>> +
>> +int SwConverter::exportBuffers(unsigned int output, unsigned int count,
>> +			       std::vector<std::unique_ptr<FrameBuffer>> *buffers)
>> +{
>> +	/* single output for now */
>> +	if (output >= 1)
>> +		return -EINVAL;
>> +
>> +	return isp_->invokeMethod(&SwConverter::Isp::exportBuffers,
>> +				  ConnectionTypeBlocking, count, buffers);
>> +}
>> +
>> +int SwConverter::Isp::exportBuffers(unsigned int count,
>> +				    std::vector<std::unique_ptr<FrameBuffer>> *buffers)
>> +{
>> +	/* V4L2_PIX_FMT_BGR24 aka 'BGR3' for output: */
>> +	unsigned int bufSize = (height_ - 2) * (width_ - 2) * 3;
>> +
>> +	for (unsigned int i = 0; i < count; i++) {
>> +		std::string name = "frame-" + std::to_string(i);
>> +
>> +		const int ispFd = memfd_create(name.c_str(), 0);
>> +		int ret = ftruncate(ispFd, bufSize);
>> +		if (ret < 0) {
>> +			LOG(Converter, Error) << "ftruncate() for memfd failed "
>> +					      << strerror(-ret);
>> +			return ret;
>> +		}
>> +
>> +		FrameBuffer::Plane outPlane;
>> +		outPlane.fd = SharedFD(std::move(ispFd));
> 
> FrameBuffer fds need to be dmabufs, not memfds.

Will fix that.

Thanks,
Andrey

>> +		outPlane.offset = 0;
>> +		outPlane.length = bufSize;
>> +
>> +		std::vector<FrameBuffer::Plane> planes{ outPlane };
>> +		buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes)));
>> +	}
>> +
>> +	return count;
>> +}
>> +
>> +int SwConverter::Isp::start()
>> +{
>> +	return 0;
>> +}
>> +
>> +void SwConverter::Isp::stop()
>> +{
>> +}
>> +
>> +void SwConverter::Isp::waitForIdle()
>> +{
>> +	MutexLocker locker(idleMutex_);
>> +
>> +	idleCV_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(idleMutex_) {
>> +			     return idle_;
>> +		     });
>> +}
>> +
>> +int SwConverter::start()
>> +{
>> +	return isp_->invokeMethod(&SwConverter::Isp::start,
>> +				  ConnectionTypeBlocking);
>> +}
>> +
>> +void SwConverter::stop()
>> +{
>> +	isp_->invokeMethod(&SwConverter::Isp::stop,
>> +			   ConnectionTypeBlocking);
>> +	isp_->invokeMethod(&SwConverter::Isp::waitForIdle,
>> +			   ConnectionTypeDirect);
>> +}
>> +
>> +int SwConverter::queueBuffers(FrameBuffer *input,
>> +			      const std::map<unsigned int, FrameBuffer *> &outputs)
>> +{
>> +	unsigned int mask = 0;
>> +
>> +	/*
>> +	 * Validate the outputs as a sanity check: at least one output is
>> +	 * required, all outputs must reference a valid stream and no two
>> +	 * outputs can reference the same stream.
>> +	 */
>> +	if (outputs.empty())
>> +		return -EINVAL;
>> +
>> +	for (auto [index, buffer] : outputs) {
>> +		if (!buffer)
>> +			return -EINVAL;
>> +		if (index >= 1) /* only single stream atm */
>> +			return -EINVAL;
>> +		if (mask & (1 << index))
>> +			return -EINVAL;
>> +
>> +		mask |= 1 << index;
>> +	}
>> +
>> +	process(input, outputs.at(0));
>> +
>> +	return 0;
>> +}
>> +
>> +void SwConverter::process(FrameBuffer *input, FrameBuffer *output)
>> +{
>> +	isp_->invokeMethod(&SwConverter::Isp::process,
>> +			   ConnectionTypeQueued, input, output);
>> +}
>> +
>> +void SwConverter::Isp::process(FrameBuffer *input, FrameBuffer *output)
>> +{
>> +	{
>> +		MutexLocker locker(idleMutex_);
>> +		idle_ = false;
>> +	}
>> +
>> +	/* Copy metadata from the input buffer */
>> +	FrameMetadata &metadata = output->_d()->metadata();
>> +	metadata.status = input->metadata().status;
>> +	metadata.sequence = input->metadata().sequence;
>> +	metadata.timestamp = input->metadata().timestamp;
>> +
>> +	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
>> +	MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
>> +	if (!in.isValid() || !out.isValid()) {
>> +		LOG(Converter, Error) << "mmap-ing buffer(s) failed";
>> +		metadata.status = FrameMetadata::FrameError;
>> +		converter_->outputBufferReady.emit(output);
>> +		converter_->inputBufferReady.emit(input);
>> +		return;
>> +	}
>> +
>> +	debayer(out.planes()[0].data(), in.planes()[0].data());
>> +	metadata.planes()[0].bytesused = out.planes()[0].size();
>> +
>> +	converter_->outputBufferReady.emit(output);
>> +	converter_->inputBufferReady.emit(input);
>> +
>> +	{
>> +		MutexLocker locker(idleMutex_);
>> +		idle_ = true;
>> +	}
>> +	idleCV_.notify_all();
>> +}
>> +
>> +void SwConverter::Isp::debayer(uint8_t *dst, const uint8_t *src)
>> +{
>> +	/* RAW10P input format is assumed */
>> +
>> +	/* output buffer is in BGR24 format and is of (width-2)*(height-2) */
>> +
>> +	int w_out = width_ - 2;
>> +	int h_out = height_ - 2;
>> +
>> +	unsigned long sumR = 0;
>> +	unsigned long sumB = 0;
>> +	unsigned long sumG = 0;
>> +
>> +	for (int y = 0; y < h_out; y++) {
>> +		const uint8_t *pin_base = src + (y + 1) * stride_;
>> +		uint8_t *pout = dst + y * w_out * 3;
>> +		int phase_y = (y + red_shift_.y) % 2;
>> +
>> +		for (int x = 0; x < w_out; x++) {
>> +			int phase_x = (x + red_shift_.x) % 2;
>> +			int phase = 2 * phase_y + phase_x;
>> +
>> +			/* x part of the offset in the input buffer: */
>> +			int x_m1 = x + x / 4;		/* offset for (x-1) */
>> +			int x_0 = x + 1 + (x + 1) / 4;	/* offset for x */
>> +			int x_p1 = x + 2 + (x + 2) / 4;	/* offset for (x+1) */
>> +			/* the colour component value to write to the output */
>> +			unsigned val;
>> +
>> +			switch (phase) {
>> +			case 0: /* at R pixel */
>> +				/* blue: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */
>> +				val = ( *(pin_base + x_m1 - stride_)
>> +					+ *(pin_base + x_p1 - stride_)
>> +					+ *(pin_base + x_m1 + stride_)
>> +					+ *(pin_base + x_p1 + stride_) ) >> 2;
>> +				val = val * bNumerat_ / bDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */
>> +				val = ( *(pin_base + x_0 - stride_)
>> +					+ *(pin_base + x_p1)
>> +					+ *(pin_base + x_m1)
>> +					+ *(pin_base + x_0 + stride_) ) >> 2;
>> +				val = val * gNumerat_ / gDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* red: (0,0) */
>> +				val = *(pin_base + x_0);
>> +				sumR += val;
>> +				val = val * rNumerat_ / rDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				break;
>> +			case 1: /* at Gr pixel */
>> +				/* blue: ((0,-1) + (0,1)) / 2 */
>> +				val = ( *(pin_base + x_0 - stride_)
>> +					+ *(pin_base + x_0 + stride_) ) >> 1;
>> +				val = val * bNumerat_ / bDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* green: (0,0) */
>> +				val = *(pin_base + x_0);
>> +				sumG += val;
>> +				val = val * gNumerat_ / gDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* red: ((-1,0) + (1,0)) / 2 */
>> +				val = ( *(pin_base + x_m1)
>> +					+ *(pin_base + x_p1) ) >> 1;
>> +				val = val * rNumerat_ / rDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				break;
>> +			case 2: /* at Gb pixel */
>> +				/* blue: ((-1,0) + (1,0)) / 2 */
>> +				val = ( *(pin_base + x_m1)
>> +					+ *(pin_base + x_p1) ) >> 1;
>> +				val = val * bNumerat_ / bDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* green: (0,0) */
>> +				val = *(pin_base + x_0);
>> +				sumG += val;
>> +				val = val * gNumerat_ / gDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* red: ((0,-1) + (0,1)) / 2 */
>> +				val = ( *(pin_base + x_0 - stride_)
>> +					+ *(pin_base + x_0 + stride_) ) >> 1;
>> +				val = val * rNumerat_ / rDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				break;
>> +			default: /* at B pixel */
>> +				/* blue: (0,0) */
>> +				val = *(pin_base + x_0);
>> +				sumB += val;
>> +				val = val * bNumerat_ / bDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */
>> +				val = ( *(pin_base + x_0 - stride_)
>> +					+ *(pin_base + x_p1)
>> +					+ *(pin_base + x_m1)
>> +					+ *(pin_base + x_0 + stride_) ) >> 2;
>> +				val = val * gNumerat_ / gDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +				/* red: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */
>> +				val = ( *(pin_base + x_m1 - stride_)
>> +					+ *(pin_base + x_p1 - stride_)
>> +					+ *(pin_base + x_m1 + stride_)
>> +					+ *(pin_base + x_p1 + stride_) ) >> 2;
>> +				val = val * rNumerat_ / rDenomin_;
>> +				*pout++ = (uint8_t)std::min(val, 0xffU);
>> +			}
>> +		}
>> +	}
>> +
>> +	/* calculate red and blue gains for simple AWB */
>> +	LOG(Converter, Debug) << "sumR = " << sumR
>> +			      << ", sumB = " << sumB << ", sumG = " << sumG;
>> +
>> +	sumG /= 2; /* the number of G pixels is twice as big vs R and B ones */
>> +
>> +	/* normalize red, blue, and green sums to fit into 22-bit value */
>> +	unsigned long fRed = sumR / 0x400000;
>> +	unsigned long fBlue = sumB / 0x400000;
>> +	unsigned long fGreen = sumG / 0x400000;
>> +	unsigned long fNorm = std::max({ 1UL, fRed, fBlue, fGreen });
>> +	sumR /= fNorm;
>> +	sumB /= fNorm;
>> +	sumG /= fNorm;
>> +
>> +	LOG(Converter, Debug) << "fNorm = " << fNorm;
>> +	LOG(Converter, Debug) << "Normalized: sumR = " << sumR
>> +			      << ", sumB= " << sumB << ", sumG = " << sumG;
>> +
>> +	/* make sure red/blue gains never exceed approximately 256 */
>> +	unsigned long minDenom;
>> +	rNumerat_ = (sumR + sumB + sumG) / 3;
>> +	minDenom = rNumerat_ / 0x100;
>> +	rDenomin_ = std::max(minDenom, sumR);
>> +	bNumerat_ = rNumerat_;
>> +	bDenomin_ = std::max(minDenom, sumB);
>> +	gNumerat_ = rNumerat_;
>> +	gDenomin_ = std::max(minDenom, sumG);
>> +
>> +	LOG(Converter, Debug) << "rGain = [ "
>> +			      << rNumerat_ << " / " << rDenomin_
>> +			      << " ], bGain = [ " << bNumerat_ << " / " << bDenomin_
>> +			      << " ], gGain = [ " << gNumerat_ << " / " << gDenomin_
>> +			      << " (minDenom = " << minDenom << ")";
>> +}
>> +
>> +static std::initializer_list<std::string> compatibles = {};
>> +
>> +REGISTER_CONVERTER("linaro-sw-converter", SwConverter, compatibles)
>> +
>> +} /* namespace libcamera */
>> diff --git a/src/libcamera/converter/meson.build b/src/libcamera/converter/meson.build
>> index 2aa72fe4..1f1e0ea4 100644
>> --- a/src/libcamera/converter/meson.build
>> +++ b/src/libcamera/converter/meson.build
>> @@ -1,5 +1,6 @@
>>   # SPDX-License-Identifier: CC0-1.0
>>   
>>   libcamera_sources += files([
>> -        'converter_v4l2_m2m.cpp'
>> +        'converter_softw.cpp',
>> +        'converter_v4l2_m2m.cpp',
>>   ])
>

Patch
diff mbox series

diff --git a/include/libcamera/internal/converter/converter_softw.h b/include/libcamera/internal/converter/converter_softw.h
new file mode 100644
index 00000000..daa2d6da
--- /dev/null
+++ b/include/libcamera/internal/converter/converter_softw.h
@@ -0,0 +1,100 @@ 
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ *
+ * converter_softw.h - interface of software converter (runs 100% on CPU)
+ */
+
+#pragma once
+
+#include <functional>
+#include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include <libcamera/base/log.h>
+#include <libcamera/base/mutex.h>
+#include <libcamera/base/signal.h>
+#include <libcamera/base/thread.h>
+
+#include <libcamera/pixel_format.h>
+
+#include "libcamera/internal/converter.h"
+
+namespace libcamera {
+
+class FrameBuffer;
+class MediaDevice;
+class Size;
+class SizeRange;
+struct StreamConfiguration;
+
+class SwConverter : public Converter
+{
+public:
+	SwConverter(MediaDevice *media);
+
+	int loadConfiguration([[maybe_unused]] const std::string &filename) { return 0; }
+	bool isValid() const { return isp_ != nullptr; }
+
+	std::vector<PixelFormat> formats(PixelFormat input);
+	SizeRange sizes(const Size &input);
+
+	std::tuple<unsigned int, unsigned int>
+	strideAndFrameSize(const PixelFormat &pixelFormat, const Size &size);
+
+	int configure(const StreamConfiguration &inputCfg,
+		      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfg);
+	int exportBuffers(unsigned int ouput, unsigned int count,
+			  std::vector<std::unique_ptr<FrameBuffer>> *buffers);
+
+	void process(FrameBuffer *input, FrameBuffer *output);
+	int start();
+	void stop();
+
+	int queueBuffers(FrameBuffer *input,
+			 const std::map<unsigned int, FrameBuffer *> &outputs);
+
+private:
+	class Isp : public Object
+	{
+	public:
+		Isp(SwConverter *converter);
+		~Isp();
+
+		int configure(const StreamConfiguration &inputCfg,
+			      const StreamConfiguration &outputCfg);
+		int exportBuffers(unsigned int count,
+				  std::vector<std::unique_ptr<FrameBuffer>> *buffers);
+		void process(FrameBuffer *input, FrameBuffer *output);
+		int start();
+		void stop();
+		void waitForIdle();
+
+	private:
+		void debayer(uint8_t *dst, const uint8_t *src);
+
+		SwConverter *converter_;
+
+		Thread thread_;
+
+		libcamera::Mutex idleMutex_;
+		libcamera::ConditionVariable idleCV_;
+		bool idle_ LIBCAMERA_TSA_GUARDED_BY(idleMutex_);
+
+		unsigned int width_;
+		unsigned int height_;
+		unsigned int stride_;
+		Point red_shift_;
+
+		unsigned long rNumerat_, rDenomin_; /* red gain for AWB */
+		unsigned long bNumerat_, bDenomin_; /* blue gain for AWB */
+		unsigned long gNumerat_, gDenomin_; /* green gain for AWB */
+	};
+
+	std::unique_ptr<Isp> isp_;
+};
+
+} /* namespace libcamera */
diff --git a/include/libcamera/internal/converter/meson.build b/include/libcamera/internal/converter/meson.build
index 891e79e7..843a0483 100644
--- a/include/libcamera/internal/converter/meson.build
+++ b/include/libcamera/internal/converter/meson.build
@@ -1,5 +1,6 @@ 
 # SPDX-License-Identifier: CC0-1.0
 
 libcamera_internal_headers += files([
+    'converter_softw.h',
     'converter_v4l2_m2m.h',
 ])
diff --git a/src/libcamera/converter/converter_softw.cpp b/src/libcamera/converter/converter_softw.cpp
new file mode 100644
index 00000000..67245715
--- /dev/null
+++ b/src/libcamera/converter/converter_softw.cpp
@@ -0,0 +1,445 @@ 
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ *
+ * converter_softw.h - interface of software converter (runs 100% on CPU)
+ */
+
+#include "libcamera/internal/converter/converter_softw.h"
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <libcamera/formats.h>
+#include <libcamera/stream.h>
+
+#include "libcamera/internal/bayer_format.h"
+#include "libcamera/internal/framebuffer.h"
+#include "libcamera/internal/mapped_framebuffer.h"
+
+namespace libcamera {
+
+LOG_DECLARE_CATEGORY(Converter)
+
+SwConverter::SwConverter([[maybe_unused]] MediaDevice *media)
+	: Converter()
+{
+	isp_ = std::make_unique<SwConverter::Isp>(this);
+}
+
+std::vector<PixelFormat> SwConverter::formats(PixelFormat input)
+{
+	BayerFormat inputFormat = BayerFormat::fromPixelFormat(input);
+
+	/* Only RAW10P is currently supported */
+	if (inputFormat.bitDepth != 10 ||
+	    inputFormat.packing != BayerFormat::Packing::CSI2) {
+		LOG(Converter, Info)
+			<< "Unsupported input format " << input.toString();
+		return {};
+	}
+
+	return { formats::RGB888 };
+}
+
+SizeRange SwConverter::sizes(const Size &input)
+{
+	if (input.width < 2 || input.height < 2) {
+		LOG(Converter, Error)
+			<< "Input format size too small: " << input.toString();
+		return {};
+	}
+
+	return SizeRange(Size(input.width - 2, input.height - 2));
+}
+
+std::tuple<unsigned int, unsigned int>
+SwConverter::strideAndFrameSize(const PixelFormat &pixelFormat,
+				const Size &size)
+{
+	/* Only RGB888 output is currently supported */
+	if (pixelFormat != formats::RGB888)
+		return {};
+
+	unsigned int stride = size.width * 3;
+	return std::make_tuple(stride, stride * (size.height));
+}
+
+int SwConverter::configure(const StreamConfiguration &inputCfg,
+			   const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
+{
+	if (outputCfgs.size() != 1) {
+		LOG(Converter, Error)
+			<< "Unsupported number of output streams: "
+			<< outputCfgs.size();
+		return -EINVAL;
+	}
+
+	return isp_->invokeMethod(&SwConverter::Isp::configure,
+				  ConnectionTypeBlocking, inputCfg, outputCfgs[0]);
+}
+
+SwConverter::Isp::Isp(SwConverter *converter)
+	: converter_(converter)
+{
+	moveToThread(&thread_);
+	thread_.start();
+}
+
+SwConverter::Isp::~Isp()
+{
+	thread_.exit();
+	thread_.wait();
+}
+
+int SwConverter::Isp::configure(const StreamConfiguration &inputCfg,
+				const StreamConfiguration &outputCfg)
+{
+	BayerFormat bayerFormat =
+		BayerFormat::fromPixelFormat(inputCfg.pixelFormat);
+	width_ = inputCfg.size.width;
+	height_ = inputCfg.size.height;
+	stride_ = inputCfg.stride;
+
+	if (bayerFormat.bitDepth != 10 ||
+	    bayerFormat.packing != BayerFormat::Packing::CSI2 ||
+	    width_ < 2 || height_ < 2) {
+		LOG(Converter, Error) << "Input format "
+				      << inputCfg.size << "-"
+				      << inputCfg.pixelFormat
+				      << "not supported";
+		return -EINVAL;
+	}
+
+	switch (bayerFormat.order) {
+	case BayerFormat::BGGR:
+		red_shift_ = Point(0, 0);
+		break;
+	case BayerFormat::GBRG:
+		red_shift_ = Point(1, 0);
+		break;
+	case BayerFormat::GRBG:
+		red_shift_ = Point(0, 1);
+		break;
+	case BayerFormat::RGGB:
+	default:
+		red_shift_ = Point(1, 1);
+		break;
+	}
+
+	if (outputCfg.size.width != width_ - 2 ||
+	    outputCfg.size.height != height_ - 2 ||
+	    outputCfg.stride != (width_ - 2) * 3 ||
+	    outputCfg.pixelFormat != formats::RGB888) {
+		LOG(Converter, Error)
+			<< "Output format not supported";
+		return -EINVAL;
+	}
+
+	LOG(Converter, Info) << "SwConverter configuration: "
+			     << inputCfg.size << "-" << inputCfg.pixelFormat
+			     << " -> "
+			     << outputCfg.size << "-" << outputCfg.pixelFormat;
+
+	/* set r/g/b gains to 1.0 until frame data collected */
+	rNumerat_ = rDenomin_ = 1;
+	bNumerat_ = bDenomin_ = 1;
+	gNumerat_ = gDenomin_ = 1;
+
+	return 0;
+}
+
+int SwConverter::exportBuffers(unsigned int output, unsigned int count,
+			       std::vector<std::unique_ptr<FrameBuffer>> *buffers)
+{
+	/* single output for now */
+	if (output >= 1)
+		return -EINVAL;
+
+	return isp_->invokeMethod(&SwConverter::Isp::exportBuffers,
+				  ConnectionTypeBlocking, count, buffers);
+}
+
+int SwConverter::Isp::exportBuffers(unsigned int count,
+				    std::vector<std::unique_ptr<FrameBuffer>> *buffers)
+{
+	/* V4L2_PIX_FMT_BGR24 aka 'BGR3' for output: */
+	unsigned int bufSize = (height_ - 2) * (width_ - 2) * 3;
+
+	for (unsigned int i = 0; i < count; i++) {
+		std::string name = "frame-" + std::to_string(i);
+
+		const int ispFd = memfd_create(name.c_str(), 0);
+		int ret = ftruncate(ispFd, bufSize);
+		if (ret < 0) {
+			LOG(Converter, Error) << "ftruncate() for memfd failed "
+					      << strerror(-ret);
+			return ret;
+		}
+
+		FrameBuffer::Plane outPlane;
+		outPlane.fd = SharedFD(std::move(ispFd));
+		outPlane.offset = 0;
+		outPlane.length = bufSize;
+
+		std::vector<FrameBuffer::Plane> planes{ outPlane };
+		buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes)));
+	}
+
+	return count;
+}
+
+int SwConverter::Isp::start()
+{
+	return 0;
+}
+
+void SwConverter::Isp::stop()
+{
+}
+
+void SwConverter::Isp::waitForIdle()
+{
+	MutexLocker locker(idleMutex_);
+
+	idleCV_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(idleMutex_) {
+			     return idle_;
+		     });
+}
+
+int SwConverter::start()
+{
+	return isp_->invokeMethod(&SwConverter::Isp::start,
+				  ConnectionTypeBlocking);
+}
+
+void SwConverter::stop()
+{
+	isp_->invokeMethod(&SwConverter::Isp::stop,
+			   ConnectionTypeBlocking);
+	isp_->invokeMethod(&SwConverter::Isp::waitForIdle,
+			   ConnectionTypeDirect);
+}
+
+int SwConverter::queueBuffers(FrameBuffer *input,
+			      const std::map<unsigned int, FrameBuffer *> &outputs)
+{
+	unsigned int mask = 0;
+
+	/*
+	 * Validate the outputs as a sanity check: at least one output is
+	 * required, all outputs must reference a valid stream and no two
+	 * outputs can reference the same stream.
+	 */
+	if (outputs.empty())
+		return -EINVAL;
+
+	for (auto [index, buffer] : outputs) {
+		if (!buffer)
+			return -EINVAL;
+		if (index >= 1) /* only single stream atm */
+			return -EINVAL;
+		if (mask & (1 << index))
+			return -EINVAL;
+
+		mask |= 1 << index;
+	}
+
+	process(input, outputs.at(0));
+
+	return 0;
+}
+
+void SwConverter::process(FrameBuffer *input, FrameBuffer *output)
+{
+	isp_->invokeMethod(&SwConverter::Isp::process,
+			   ConnectionTypeQueued, input, output);
+}
+
+void SwConverter::Isp::process(FrameBuffer *input, FrameBuffer *output)
+{
+	{
+		MutexLocker locker(idleMutex_);
+		idle_ = false;
+	}
+
+	/* Copy metadata from the input buffer */
+	FrameMetadata &metadata = output->_d()->metadata();
+	metadata.status = input->metadata().status;
+	metadata.sequence = input->metadata().sequence;
+	metadata.timestamp = input->metadata().timestamp;
+
+	MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
+	MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
+	if (!in.isValid() || !out.isValid()) {
+		LOG(Converter, Error) << "mmap-ing buffer(s) failed";
+		metadata.status = FrameMetadata::FrameError;
+		converter_->outputBufferReady.emit(output);
+		converter_->inputBufferReady.emit(input);
+		return;
+	}
+
+	debayer(out.planes()[0].data(), in.planes()[0].data());
+	metadata.planes()[0].bytesused = out.planes()[0].size();
+
+	converter_->outputBufferReady.emit(output);
+	converter_->inputBufferReady.emit(input);
+
+	{
+		MutexLocker locker(idleMutex_);
+		idle_ = true;
+	}
+	idleCV_.notify_all();
+}
+
+void SwConverter::Isp::debayer(uint8_t *dst, const uint8_t *src)
+{
+	/* RAW10P input format is assumed */
+
+	/* output buffer is in BGR24 format and is of (width-2)*(height-2) */
+
+	int w_out = width_ - 2;
+	int h_out = height_ - 2;
+
+	unsigned long sumR = 0;
+	unsigned long sumB = 0;
+	unsigned long sumG = 0;
+
+	for (int y = 0; y < h_out; y++) {
+		const uint8_t *pin_base = src + (y + 1) * stride_;
+		uint8_t *pout = dst + y * w_out * 3;
+		int phase_y = (y + red_shift_.y) % 2;
+
+		for (int x = 0; x < w_out; x++) {
+			int phase_x = (x + red_shift_.x) % 2;
+			int phase = 2 * phase_y + phase_x;
+
+			/* x part of the offset in the input buffer: */
+			int x_m1 = x + x / 4;		/* offset for (x-1) */
+			int x_0 = x + 1 + (x + 1) / 4;	/* offset for x */
+			int x_p1 = x + 2 + (x + 2) / 4;	/* offset for (x+1) */
+			/* the colour component value to write to the output */
+			unsigned val;
+
+			switch (phase) {
+			case 0: /* at R pixel */
+				/* blue: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */
+				val = ( *(pin_base + x_m1 - stride_)
+					+ *(pin_base + x_p1 - stride_)
+					+ *(pin_base + x_m1 + stride_)
+					+ *(pin_base + x_p1 + stride_) ) >> 2;
+				val = val * bNumerat_ / bDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */
+				val = ( *(pin_base + x_0 - stride_)
+					+ *(pin_base + x_p1)
+					+ *(pin_base + x_m1)
+					+ *(pin_base + x_0 + stride_) ) >> 2;
+				val = val * gNumerat_ / gDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* red: (0,0) */
+				val = *(pin_base + x_0);
+				sumR += val;
+				val = val * rNumerat_ / rDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				break;
+			case 1: /* at Gr pixel */
+				/* blue: ((0,-1) + (0,1)) / 2 */
+				val = ( *(pin_base + x_0 - stride_)
+					+ *(pin_base + x_0 + stride_) ) >> 1;
+				val = val * bNumerat_ / bDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* green: (0,0) */
+				val = *(pin_base + x_0);
+				sumG += val;
+				val = val * gNumerat_ / gDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* red: ((-1,0) + (1,0)) / 2 */
+				val = ( *(pin_base + x_m1)
+					+ *(pin_base + x_p1) ) >> 1;
+				val = val * rNumerat_ / rDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				break;
+			case 2: /* at Gb pixel */
+				/* blue: ((-1,0) + (1,0)) / 2 */
+				val = ( *(pin_base + x_m1)
+					+ *(pin_base + x_p1) ) >> 1;
+				val = val * bNumerat_ / bDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* green: (0,0) */
+				val = *(pin_base + x_0);
+				sumG += val;
+				val = val * gNumerat_ / gDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* red: ((0,-1) + (0,1)) / 2 */
+				val = ( *(pin_base + x_0 - stride_)
+					+ *(pin_base + x_0 + stride_) ) >> 1;
+				val = val * rNumerat_ / rDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				break;
+			default: /* at B pixel */
+				/* blue: (0,0) */
+				val = *(pin_base + x_0);
+				sumB += val;
+				val = val * bNumerat_ / bDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */
+				val = ( *(pin_base + x_0 - stride_)
+					+ *(pin_base + x_p1)
+					+ *(pin_base + x_m1)
+					+ *(pin_base + x_0 + stride_) ) >> 2;
+				val = val * gNumerat_ / gDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+				/* red: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */
+				val = ( *(pin_base + x_m1 - stride_)
+					+ *(pin_base + x_p1 - stride_)
+					+ *(pin_base + x_m1 + stride_)
+					+ *(pin_base + x_p1 + stride_) ) >> 2;
+				val = val * rNumerat_ / rDenomin_;
+				*pout++ = (uint8_t)std::min(val, 0xffU);
+			}
+		}
+	}
+
+	/* calculate red and blue gains for simple AWB */
+	LOG(Converter, Debug) << "sumR = " << sumR
+			      << ", sumB = " << sumB << ", sumG = " << sumG;
+
+	sumG /= 2; /* the number of G pixels is twice as big vs R and B ones */
+
+	/* normalize red, blue, and green sums to fit into 22-bit value */
+	unsigned long fRed = sumR / 0x400000;
+	unsigned long fBlue = sumB / 0x400000;
+	unsigned long fGreen = sumG / 0x400000;
+	unsigned long fNorm = std::max({ 1UL, fRed, fBlue, fGreen });
+	sumR /= fNorm;
+	sumB /= fNorm;
+	sumG /= fNorm;
+
+	LOG(Converter, Debug) << "fNorm = " << fNorm;
+	LOG(Converter, Debug) << "Normalized: sumR = " << sumR
+			      << ", sumB= " << sumB << ", sumG = " << sumG;
+
+	/* make sure red/blue gains never exceed approximately 256 */
+	unsigned long minDenom;
+	rNumerat_ = (sumR + sumB + sumG) / 3;
+	minDenom = rNumerat_ / 0x100;
+	rDenomin_ = std::max(minDenom, sumR);
+	bNumerat_ = rNumerat_;
+	bDenomin_ = std::max(minDenom, sumB);
+	gNumerat_ = rNumerat_;
+	gDenomin_ = std::max(minDenom, sumG);
+
+	LOG(Converter, Debug) << "rGain = [ "
+			      << rNumerat_ << " / " << rDenomin_
+			      << " ], bGain = [ " << bNumerat_ << " / " << bDenomin_
+			      << " ], gGain = [ " << gNumerat_ << " / " << gDenomin_
+			      << " (minDenom = " << minDenom << ")";
+}
+
+static std::initializer_list<std::string> compatibles = {};
+
+REGISTER_CONVERTER("linaro-sw-converter", SwConverter, compatibles)
+
+} /* namespace libcamera */
diff --git a/src/libcamera/converter/meson.build b/src/libcamera/converter/meson.build
index 2aa72fe4..1f1e0ea4 100644
--- a/src/libcamera/converter/meson.build
+++ b/src/libcamera/converter/meson.build
@@ -1,5 +1,6 @@ 
 # SPDX-License-Identifier: CC0-1.0
 
 libcamera_sources += files([
-        'converter_v4l2_m2m.cpp'
+        'converter_softw.cpp',
+        'converter_v4l2_m2m.cpp',
 ])