Message ID | 20230928185537.20178-5-andrey.konovalov@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series |
|
Related | show |
Hi Andrey, Thank you for the patch. I haven't reviewed everything in details as there are parts that I expect will change, but here are a few comments already. On Thu, Sep 28, 2023 at 09:55:36PM +0300, Andrey Konovalov via libcamera-devel wrote: > Use the Converter interface to implement bilinear Bayer demosaic > filtering and very simple AWB on CPU. > > The only output format currently supported is RGB888. > > Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org> > --- > .../internal/converter/converter_softw.h | 100 ++++ > .../libcamera/internal/converter/meson.build | 1 + > src/libcamera/converter/converter_softw.cpp | 445 ++++++++++++++++++ > src/libcamera/converter/meson.build | 3 +- > 4 files changed, 548 insertions(+), 1 deletion(-) > create mode 100644 include/libcamera/internal/converter/converter_softw.h > create mode 100644 src/libcamera/converter/converter_softw.cpp > > diff --git a/include/libcamera/internal/converter/converter_softw.h b/include/libcamera/internal/converter/converter_softw.h > new file mode 100644 > index 00000000..daa2d6da > --- /dev/null > +++ b/include/libcamera/internal/converter/converter_softw.h > @@ -0,0 +1,100 @@ > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > +/* > + * Copyright (C) 2023, Linaro Ltd > + * > + * converter_softw.h - interface of software converter (runs 100% on CPU) > + */ > + > +#pragma once > + > +#include <functional> > +#include <map> > +#include <memory> > +#include <string> > +#include <tuple> > +#include <vector> > + > +#include <libcamera/base/log.h> > +#include <libcamera/base/mutex.h> > +#include <libcamera/base/signal.h> > +#include <libcamera/base/thread.h> > + > +#include <libcamera/pixel_format.h> > + > +#include "libcamera/internal/converter.h" > + > +namespace libcamera { > + > +class FrameBuffer; > +class MediaDevice; > +class Size; > +class SizeRange; > +struct StreamConfiguration; > + > +class SwConverter : public Converter > +{ > +public: > + SwConverter(MediaDevice *media); > + > + int loadConfiguration([[maybe_unused]] const std::string &filename) { return 0; } > + bool isValid() const { return isp_ != nullptr; } > + > + std::vector<PixelFormat> formats(PixelFormat input); > + SizeRange sizes(const Size &input); > + > + std::tuple<unsigned int, unsigned int> > + strideAndFrameSize(const PixelFormat &pixelFormat, const Size &size); > + > + int configure(const StreamConfiguration &inputCfg, > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfg); > + int exportBuffers(unsigned int ouput, unsigned int count, > + std::vector<std::unique_ptr<FrameBuffer>> *buffers); > + > + void process(FrameBuffer *input, FrameBuffer *output); > + int start(); > + void stop(); > + > + int queueBuffers(FrameBuffer *input, > + const std::map<unsigned int, FrameBuffer *> &outputs); > + > +private: > + class Isp : public Object > + { > + public: > + Isp(SwConverter *converter); > + ~Isp(); > + > + int configure(const StreamConfiguration &inputCfg, > + const StreamConfiguration &outputCfg); > + int exportBuffers(unsigned int count, > + std::vector<std::unique_ptr<FrameBuffer>> *buffers); > + void process(FrameBuffer *input, FrameBuffer *output); > + int start(); > + void stop(); > + void waitForIdle(); > + > + private: > + void debayer(uint8_t *dst, const uint8_t *src); > + > + SwConverter *converter_; > + > + Thread thread_; I think you can simplify thread management by inheriting from the Thread class instead of the Object class. See PostProcessorWorker in the Android camera HAL implementation for instance, or CameraManager::Private. I would then possibly also name this class ISPWorker. > + > + libcamera::Mutex idleMutex_; > + libcamera::ConditionVariable idleCV_; > + bool idle_ LIBCAMERA_TSA_GUARDED_BY(idleMutex_); > + > + unsigned int width_; > + unsigned int height_; > + unsigned int stride_; > + Point red_shift_; s/red_shift_/redShift_/ > + > + unsigned long rNumerat_, rDenomin_; /* red gain for AWB */ > + unsigned long bNumerat_, bDenomin_; /* blue gain for AWB */ > + unsigned long gNumerat_, gDenomin_; /* green gain for AWB */ > + }; > + > + std::unique_ptr<Isp> isp_; > +}; > + > +} /* namespace libcamera */ > diff --git a/include/libcamera/internal/converter/meson.build b/include/libcamera/internal/converter/meson.build > index 891e79e7..843a0483 100644 > --- a/include/libcamera/internal/converter/meson.build > +++ b/include/libcamera/internal/converter/meson.build > @@ -1,5 +1,6 @@ > # SPDX-License-Identifier: CC0-1.0 > > libcamera_internal_headers += files([ > + 'converter_softw.h', > 'converter_v4l2_m2m.h', > ]) > diff --git a/src/libcamera/converter/converter_softw.cpp b/src/libcamera/converter/converter_softw.cpp > new file mode 100644 > index 00000000..67245715 > --- /dev/null > +++ b/src/libcamera/converter/converter_softw.cpp > @@ -0,0 +1,445 @@ > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > +/* > + * Copyright (C) 2023, Linaro Ltd > + * > + * converter_softw.h - interface of software converter (runs 100% on CPU) > + */ > + > +#include "libcamera/internal/converter/converter_softw.h" > + > +#include <sys/mman.h> > +#include <sys/types.h> > +#include <unistd.h> > + > +#include <libcamera/formats.h> > +#include <libcamera/stream.h> > + > +#include "libcamera/internal/bayer_format.h" > +#include "libcamera/internal/framebuffer.h" > +#include "libcamera/internal/mapped_framebuffer.h" > + > +namespace libcamera { > + > +LOG_DECLARE_CATEGORY(Converter) > + > +SwConverter::SwConverter([[maybe_unused]] MediaDevice *media) > + : Converter() > +{ > + isp_ = std::make_unique<SwConverter::Isp>(this); > +} > + > +std::vector<PixelFormat> SwConverter::formats(PixelFormat input) > +{ > + BayerFormat inputFormat = BayerFormat::fromPixelFormat(input); > + > + /* Only RAW10P is currently supported */ > + if (inputFormat.bitDepth != 10 || > + inputFormat.packing != BayerFormat::Packing::CSI2) { > + LOG(Converter, Info) > + << "Unsupported input format " << input.toString(); > + return {}; > + } > + > + return { formats::RGB888 }; > +} > + > +SizeRange SwConverter::sizes(const Size &input) > +{ > + if (input.width < 2 || input.height < 2) { > + LOG(Converter, Error) > + << "Input format size too small: " << input.toString(); > + return {}; > + } > + > + return SizeRange(Size(input.width - 2, input.height - 2)); > +} > + > +std::tuple<unsigned int, unsigned int> > +SwConverter::strideAndFrameSize(const PixelFormat &pixelFormat, > + const Size &size) > +{ > + /* Only RGB888 output is currently supported */ > + if (pixelFormat != formats::RGB888) > + return {}; > + > + unsigned int stride = size.width * 3; > + return std::make_tuple(stride, stride * (size.height)); > +} > + > +int SwConverter::configure(const StreamConfiguration &inputCfg, > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) > +{ > + if (outputCfgs.size() != 1) { > + LOG(Converter, Error) > + << "Unsupported number of output streams: " > + << outputCfgs.size(); > + return -EINVAL; > + } > + > + return isp_->invokeMethod(&SwConverter::Isp::configure, > + ConnectionTypeBlocking, inputCfg, outputCfgs[0]); > +} > + > +SwConverter::Isp::Isp(SwConverter *converter) > + : converter_(converter) > +{ > + moveToThread(&thread_); Not needed if you inherit from Thread. > + thread_.start(); Is there a reason to start the thread here instead of SwConverter::start() ? Same question for stop. If you start the thread in SwConverter::start(), you won't need to call invokeMethod() in SwConverter::configure(). > +} > + > +SwConverter::Isp::~Isp() > +{ > + thread_.exit(); > + thread_.wait(); > +} > + > +int SwConverter::Isp::configure(const StreamConfiguration &inputCfg, > + const StreamConfiguration &outputCfg) > +{ > + BayerFormat bayerFormat = > + BayerFormat::fromPixelFormat(inputCfg.pixelFormat); > + width_ = inputCfg.size.width; > + height_ = inputCfg.size.height; > + stride_ = inputCfg.stride; > + > + if (bayerFormat.bitDepth != 10 || > + bayerFormat.packing != BayerFormat::Packing::CSI2 || > + width_ < 2 || height_ < 2) { > + LOG(Converter, Error) << "Input format " > + << inputCfg.size << "-" > + << inputCfg.pixelFormat > + << "not supported"; > + return -EINVAL; > + } > + > + switch (bayerFormat.order) { > + case BayerFormat::BGGR: > + red_shift_ = Point(0, 0); > + break; > + case BayerFormat::GBRG: > + red_shift_ = Point(1, 0); > + break; > + case BayerFormat::GRBG: > + red_shift_ = Point(0, 1); > + break; > + case BayerFormat::RGGB: > + default: > + red_shift_ = Point(1, 1); > + break; > + } > + > + if (outputCfg.size.width != width_ - 2 || > + outputCfg.size.height != height_ - 2 || > + outputCfg.stride != (width_ - 2) * 3 || > + outputCfg.pixelFormat != formats::RGB888) { > + LOG(Converter, Error) > + << "Output format not supported"; > + return -EINVAL; > + } > + > + LOG(Converter, Info) << "SwConverter configuration: " > + << inputCfg.size << "-" << inputCfg.pixelFormat > + << " -> " > + << outputCfg.size << "-" << outputCfg.pixelFormat; > + > + /* set r/g/b gains to 1.0 until frame data collected */ > + rNumerat_ = rDenomin_ = 1; > + bNumerat_ = bDenomin_ = 1; > + gNumerat_ = gDenomin_ = 1; > + > + return 0; > +} > + > +int SwConverter::exportBuffers(unsigned int output, unsigned int count, > + std::vector<std::unique_ptr<FrameBuffer>> *buffers) > +{ > + /* single output for now */ > + if (output >= 1) > + return -EINVAL; > + > + return isp_->invokeMethod(&SwConverter::Isp::exportBuffers, > + ConnectionTypeBlocking, count, buffers); > +} > + > +int SwConverter::Isp::exportBuffers(unsigned int count, > + std::vector<std::unique_ptr<FrameBuffer>> *buffers) > +{ > + /* V4L2_PIX_FMT_BGR24 aka 'BGR3' for output: */ > + unsigned int bufSize = (height_ - 2) * (width_ - 2) * 3; > + > + for (unsigned int i = 0; i < count; i++) { > + std::string name = "frame-" + std::to_string(i); > + > + const int ispFd = memfd_create(name.c_str(), 0); > + int ret = ftruncate(ispFd, bufSize); > + if (ret < 0) { > + LOG(Converter, Error) << "ftruncate() for memfd failed " > + << strerror(-ret); > + return ret; > + } > + > + FrameBuffer::Plane outPlane; > + outPlane.fd = SharedFD(std::move(ispFd)); FrameBuffer fds need to be dmabufs, not memfds. > + outPlane.offset = 0; > + outPlane.length = bufSize; > + > + std::vector<FrameBuffer::Plane> planes{ outPlane }; > + buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes))); > + } > + > + return count; > +} > + > +int SwConverter::Isp::start() > +{ > + return 0; > +} > + > +void SwConverter::Isp::stop() > +{ > +} > + > +void SwConverter::Isp::waitForIdle() > +{ > + MutexLocker locker(idleMutex_); > + > + idleCV_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(idleMutex_) { > + return idle_; > + }); > +} > + > +int SwConverter::start() > +{ > + return isp_->invokeMethod(&SwConverter::Isp::start, > + ConnectionTypeBlocking); > +} > + > +void SwConverter::stop() > +{ > + isp_->invokeMethod(&SwConverter::Isp::stop, > + ConnectionTypeBlocking); > + isp_->invokeMethod(&SwConverter::Isp::waitForIdle, > + ConnectionTypeDirect); > +} > + > +int SwConverter::queueBuffers(FrameBuffer *input, > + const std::map<unsigned int, FrameBuffer *> &outputs) > +{ > + unsigned int mask = 0; > + > + /* > + * Validate the outputs as a sanity check: at least one output is > + * required, all outputs must reference a valid stream and no two > + * outputs can reference the same stream. > + */ > + if (outputs.empty()) > + return -EINVAL; > + > + for (auto [index, buffer] : outputs) { > + if (!buffer) > + return -EINVAL; > + if (index >= 1) /* only single stream atm */ > + return -EINVAL; > + if (mask & (1 << index)) > + return -EINVAL; > + > + mask |= 1 << index; > + } > + > + process(input, outputs.at(0)); > + > + return 0; > +} > + > +void SwConverter::process(FrameBuffer *input, FrameBuffer *output) > +{ > + isp_->invokeMethod(&SwConverter::Isp::process, > + ConnectionTypeQueued, input, output); > +} > + > +void SwConverter::Isp::process(FrameBuffer *input, FrameBuffer *output) > +{ > + { > + MutexLocker locker(idleMutex_); > + idle_ = false; > + } > + > + /* Copy metadata from the input buffer */ > + FrameMetadata &metadata = output->_d()->metadata(); > + metadata.status = input->metadata().status; > + metadata.sequence = input->metadata().sequence; > + metadata.timestamp = input->metadata().timestamp; > + > + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); > + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); > + if (!in.isValid() || !out.isValid()) { > + LOG(Converter, Error) << "mmap-ing buffer(s) failed"; > + metadata.status = FrameMetadata::FrameError; > + converter_->outputBufferReady.emit(output); > + converter_->inputBufferReady.emit(input); > + return; > + } > + > + debayer(out.planes()[0].data(), in.planes()[0].data()); > + metadata.planes()[0].bytesused = out.planes()[0].size(); > + > + converter_->outputBufferReady.emit(output); > + converter_->inputBufferReady.emit(input); > + > + { > + MutexLocker locker(idleMutex_); > + idle_ = true; > + } > + idleCV_.notify_all(); > +} > + > +void SwConverter::Isp::debayer(uint8_t *dst, const uint8_t *src) > +{ > + /* RAW10P input format is assumed */ > + > + /* output buffer is in BGR24 format and is of (width-2)*(height-2) */ > + > + int w_out = width_ - 2; > + int h_out = height_ - 2; > + > + unsigned long sumR = 0; > + unsigned long sumB = 0; > + unsigned long sumG = 0; > + > + for (int y = 0; y < h_out; y++) { > + const uint8_t *pin_base = src + (y + 1) * stride_; > + uint8_t *pout = dst + y * w_out * 3; > + int phase_y = (y + red_shift_.y) % 2; > + > + for (int x = 0; x < w_out; x++) { > + int phase_x = (x + red_shift_.x) % 2; > + int phase = 2 * phase_y + phase_x; > + > + /* x part of the offset in the input buffer: */ > + int x_m1 = x + x / 4; /* offset for (x-1) */ > + int x_0 = x + 1 + (x + 1) / 4; /* offset for x */ > + int x_p1 = x + 2 + (x + 2) / 4; /* offset for (x+1) */ > + /* the colour component value to write to the output */ > + unsigned val; > + > + switch (phase) { > + case 0: /* at R pixel */ > + /* blue: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */ > + val = ( *(pin_base + x_m1 - stride_) > + + *(pin_base + x_p1 - stride_) > + + *(pin_base + x_m1 + stride_) > + + *(pin_base + x_p1 + stride_) ) >> 2; > + val = val * bNumerat_ / bDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */ > + val = ( *(pin_base + x_0 - stride_) > + + *(pin_base + x_p1) > + + *(pin_base + x_m1) > + + *(pin_base + x_0 + stride_) ) >> 2; > + val = val * gNumerat_ / gDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* red: (0,0) */ > + val = *(pin_base + x_0); > + sumR += val; > + val = val * rNumerat_ / rDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + break; > + case 1: /* at Gr pixel */ > + /* blue: ((0,-1) + (0,1)) / 2 */ > + val = ( *(pin_base + x_0 - stride_) > + + *(pin_base + x_0 + stride_) ) >> 1; > + val = val * bNumerat_ / bDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* green: (0,0) */ > + val = *(pin_base + x_0); > + sumG += val; > + val = val * gNumerat_ / gDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* red: ((-1,0) + (1,0)) / 2 */ > + val = ( *(pin_base + x_m1) > + + *(pin_base + x_p1) ) >> 1; > + val = val * rNumerat_ / rDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + break; > + case 2: /* at Gb pixel */ > + /* blue: ((-1,0) + (1,0)) / 2 */ > + val = ( *(pin_base + x_m1) > + + *(pin_base + x_p1) ) >> 1; > + val = val * bNumerat_ / bDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* green: (0,0) */ > + val = *(pin_base + x_0); > + sumG += val; > + val = val * gNumerat_ / gDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* red: ((0,-1) + (0,1)) / 2 */ > + val = ( *(pin_base + x_0 - stride_) > + + *(pin_base + x_0 + stride_) ) >> 1; > + val = val * rNumerat_ / rDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + break; > + default: /* at B pixel */ > + /* blue: (0,0) */ > + val = *(pin_base + x_0); > + sumB += val; > + val = val * bNumerat_ / bDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */ > + val = ( *(pin_base + x_0 - stride_) > + + *(pin_base + x_p1) > + + *(pin_base + x_m1) > + + *(pin_base + x_0 + stride_) ) >> 2; > + val = val * gNumerat_ / gDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + /* red: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */ > + val = ( *(pin_base + x_m1 - stride_) > + + *(pin_base + x_p1 - stride_) > + + *(pin_base + x_m1 + stride_) > + + *(pin_base + x_p1 + stride_) ) >> 2; > + val = val * rNumerat_ / rDenomin_; > + *pout++ = (uint8_t)std::min(val, 0xffU); > + } > + } > + } > + > + /* calculate red and blue gains for simple AWB */ > + LOG(Converter, Debug) << "sumR = " << sumR > + << ", sumB = " << sumB << ", sumG = " << sumG; > + > + sumG /= 2; /* the number of G pixels is twice as big vs R and B ones */ > + > + /* normalize red, blue, and green sums to fit into 22-bit value */ > + unsigned long fRed = sumR / 0x400000; > + unsigned long fBlue = sumB / 0x400000; > + unsigned long fGreen = sumG / 0x400000; > + unsigned long fNorm = std::max({ 1UL, fRed, fBlue, fGreen }); > + sumR /= fNorm; > + sumB /= fNorm; > + sumG /= fNorm; > + > + LOG(Converter, Debug) << "fNorm = " << fNorm; > + LOG(Converter, Debug) << "Normalized: sumR = " << sumR > + << ", sumB= " << sumB << ", sumG = " << sumG; > + > + /* make sure red/blue gains never exceed approximately 256 */ > + unsigned long minDenom; > + rNumerat_ = (sumR + sumB + sumG) / 3; > + minDenom = rNumerat_ / 0x100; > + rDenomin_ = std::max(minDenom, sumR); > + bNumerat_ = rNumerat_; > + bDenomin_ = std::max(minDenom, sumB); > + gNumerat_ = rNumerat_; > + gDenomin_ = std::max(minDenom, sumG); > + > + LOG(Converter, Debug) << "rGain = [ " > + << rNumerat_ << " / " << rDenomin_ > + << " ], bGain = [ " << bNumerat_ << " / " << bDenomin_ > + << " ], gGain = [ " << gNumerat_ << " / " << gDenomin_ > + << " (minDenom = " << minDenom << ")"; > +} > + > +static std::initializer_list<std::string> compatibles = {}; > + > +REGISTER_CONVERTER("linaro-sw-converter", SwConverter, compatibles) > + > +} /* namespace libcamera */ > diff --git a/src/libcamera/converter/meson.build b/src/libcamera/converter/meson.build > index 2aa72fe4..1f1e0ea4 100644 > --- a/src/libcamera/converter/meson.build > +++ b/src/libcamera/converter/meson.build > @@ -1,5 +1,6 @@ > # SPDX-License-Identifier: CC0-1.0 > > libcamera_sources += files([ > - 'converter_v4l2_m2m.cpp' > + 'converter_softw.cpp', > + 'converter_v4l2_m2m.cpp', > ])
Hi Laurent, On 30.09.2023 12:59, Laurent Pinchart wrote: > Hi Andrey, > > Thank you for the patch. I haven't reviewed everything in details as > there are parts that I expect will change, but here are a few comments > already. > > On Thu, Sep 28, 2023 at 09:55:36PM +0300, Andrey Konovalov via libcamera-devel wrote: >> Use the Converter interface to implement bilinear Bayer demosaic >> filtering and very simple AWB on CPU. >> >> The only output format currently supported is RGB888. >> >> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org> >> --- >> .../internal/converter/converter_softw.h | 100 ++++ >> .../libcamera/internal/converter/meson.build | 1 + >> src/libcamera/converter/converter_softw.cpp | 445 ++++++++++++++++++ >> src/libcamera/converter/meson.build | 3 +- >> 4 files changed, 548 insertions(+), 1 deletion(-) >> create mode 100644 include/libcamera/internal/converter/converter_softw.h >> create mode 100644 src/libcamera/converter/converter_softw.cpp >> >> diff --git a/include/libcamera/internal/converter/converter_softw.h b/include/libcamera/internal/converter/converter_softw.h >> new file mode 100644 >> index 00000000..daa2d6da >> --- /dev/null >> +++ b/include/libcamera/internal/converter/converter_softw.h >> @@ -0,0 +1,100 @@ >> +/* SPDX-License-Identifier: LGPL-2.1-or-later */ >> +/* >> + * Copyright (C) 2023, Linaro Ltd >> + * >> + * converter_softw.h - interface of software converter (runs 100% on CPU) >> + */ >> + >> +#pragma once >> + >> +#include <functional> >> +#include <map> >> +#include <memory> >> +#include <string> >> +#include <tuple> >> +#include <vector> >> + >> +#include <libcamera/base/log.h> >> +#include <libcamera/base/mutex.h> >> +#include <libcamera/base/signal.h> >> +#include <libcamera/base/thread.h> >> + >> +#include <libcamera/pixel_format.h> >> + >> +#include "libcamera/internal/converter.h" >> + >> +namespace libcamera { >> + >> +class FrameBuffer; >> +class MediaDevice; >> +class Size; >> +class SizeRange; >> +struct StreamConfiguration; >> + >> +class SwConverter : public Converter >> +{ >> +public: >> + SwConverter(MediaDevice *media); >> + >> + int loadConfiguration([[maybe_unused]] const std::string &filename) { return 0; } >> + bool isValid() const { return isp_ != nullptr; } >> + >> + std::vector<PixelFormat> formats(PixelFormat input); >> + SizeRange sizes(const Size &input); >> + >> + std::tuple<unsigned int, unsigned int> >> + strideAndFrameSize(const PixelFormat &pixelFormat, const Size &size); >> + >> + int configure(const StreamConfiguration &inputCfg, >> + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfg); >> + int exportBuffers(unsigned int ouput, unsigned int count, >> + std::vector<std::unique_ptr<FrameBuffer>> *buffers); >> + >> + void process(FrameBuffer *input, FrameBuffer *output); >> + int start(); >> + void stop(); >> + >> + int queueBuffers(FrameBuffer *input, >> + const std::map<unsigned int, FrameBuffer *> &outputs); >> + >> +private: >> + class Isp : public Object >> + { >> + public: >> + Isp(SwConverter *converter); >> + ~Isp(); >> + >> + int configure(const StreamConfiguration &inputCfg, >> + const StreamConfiguration &outputCfg); >> + int exportBuffers(unsigned int count, >> + std::vector<std::unique_ptr<FrameBuffer>> *buffers); >> + void process(FrameBuffer *input, FrameBuffer *output); >> + int start(); >> + void stop(); >> + void waitForIdle(); >> + >> + private: >> + void debayer(uint8_t *dst, const uint8_t *src); >> + >> + SwConverter *converter_; >> + >> + Thread thread_; > > I think you can simplify thread management by inheriting from the Thread > class instead of the Object class. See PostProcessorWorker in the > Android camera HAL implementation for instance, or > CameraManager::Private. I would then possibly also name this class > ISPWorker. Yes, the thread management part of this patch needs a clean up. >> + >> + libcamera::Mutex idleMutex_; >> + libcamera::ConditionVariable idleCV_; >> + bool idle_ LIBCAMERA_TSA_GUARDED_BY(idleMutex_); >> + >> + unsigned int width_; >> + unsigned int height_; >> + unsigned int stride_; >> + Point red_shift_; > > s/red_shift_/redShift_/ OK >> + >> + unsigned long rNumerat_, rDenomin_; /* red gain for AWB */ >> + unsigned long bNumerat_, bDenomin_; /* blue gain for AWB */ >> + unsigned long gNumerat_, gDenomin_; /* green gain for AWB */ >> + }; >> + >> + std::unique_ptr<Isp> isp_; >> +}; >> + >> +} /* namespace libcamera */ >> diff --git a/include/libcamera/internal/converter/meson.build b/include/libcamera/internal/converter/meson.build >> index 891e79e7..843a0483 100644 >> --- a/include/libcamera/internal/converter/meson.build >> +++ b/include/libcamera/internal/converter/meson.build >> @@ -1,5 +1,6 @@ >> # SPDX-License-Identifier: CC0-1.0 >> >> libcamera_internal_headers += files([ >> + 'converter_softw.h', >> 'converter_v4l2_m2m.h', >> ]) >> diff --git a/src/libcamera/converter/converter_softw.cpp b/src/libcamera/converter/converter_softw.cpp >> new file mode 100644 >> index 00000000..67245715 >> --- /dev/null >> +++ b/src/libcamera/converter/converter_softw.cpp >> @@ -0,0 +1,445 @@ >> +/* SPDX-License-Identifier: LGPL-2.1-or-later */ >> +/* >> + * Copyright (C) 2023, Linaro Ltd >> + * >> + * converter_softw.h - interface of software converter (runs 100% on CPU) >> + */ >> + >> +#include "libcamera/internal/converter/converter_softw.h" >> + >> +#include <sys/mman.h> >> +#include <sys/types.h> >> +#include <unistd.h> >> + >> +#include <libcamera/formats.h> >> +#include <libcamera/stream.h> >> + >> +#include "libcamera/internal/bayer_format.h" >> +#include "libcamera/internal/framebuffer.h" >> +#include "libcamera/internal/mapped_framebuffer.h" >> + >> +namespace libcamera { >> + >> +LOG_DECLARE_CATEGORY(Converter) >> + >> +SwConverter::SwConverter([[maybe_unused]] MediaDevice *media) >> + : Converter() >> +{ >> + isp_ = std::make_unique<SwConverter::Isp>(this); >> +} >> + >> +std::vector<PixelFormat> SwConverter::formats(PixelFormat input) >> +{ >> + BayerFormat inputFormat = BayerFormat::fromPixelFormat(input); >> + >> + /* Only RAW10P is currently supported */ >> + if (inputFormat.bitDepth != 10 || >> + inputFormat.packing != BayerFormat::Packing::CSI2) { >> + LOG(Converter, Info) >> + << "Unsupported input format " << input.toString(); >> + return {}; >> + } >> + >> + return { formats::RGB888 }; >> +} >> + >> +SizeRange SwConverter::sizes(const Size &input) >> +{ >> + if (input.width < 2 || input.height < 2) { >> + LOG(Converter, Error) >> + << "Input format size too small: " << input.toString(); >> + return {}; >> + } >> + >> + return SizeRange(Size(input.width - 2, input.height - 2)); >> +} >> + >> +std::tuple<unsigned int, unsigned int> >> +SwConverter::strideAndFrameSize(const PixelFormat &pixelFormat, >> + const Size &size) >> +{ >> + /* Only RGB888 output is currently supported */ >> + if (pixelFormat != formats::RGB888) >> + return {}; >> + >> + unsigned int stride = size.width * 3; >> + return std::make_tuple(stride, stride * (size.height)); >> +} >> + >> +int SwConverter::configure(const StreamConfiguration &inputCfg, >> + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) >> +{ >> + if (outputCfgs.size() != 1) { >> + LOG(Converter, Error) >> + << "Unsupported number of output streams: " >> + << outputCfgs.size(); >> + return -EINVAL; >> + } >> + >> + return isp_->invokeMethod(&SwConverter::Isp::configure, >> + ConnectionTypeBlocking, inputCfg, outputCfgs[0]); >> +} >> + >> +SwConverter::Isp::Isp(SwConverter *converter) >> + : converter_(converter) >> +{ >> + moveToThread(&thread_); > > Not needed if you inherit from Thread. > >> + thread_.start(); > > Is there a reason to start the thread here instead of > SwConverter::start() ? Same question for stop. No good reason. I jwas just trying different options. While listening to your talk at Embedded Recipes yesterday I've got to that following the IPA module pattern - start/stop the thread in SwConverter::start()/stop() - is the more natural way. > If you start the thread in SwConverter::start(), you won't need to call > invokeMethod() in SwConverter::configure(). > >> +} >> + >> +SwConverter::Isp::~Isp() >> +{ >> + thread_.exit(); >> + thread_.wait(); >> +} >> + >> +int SwConverter::Isp::configure(const StreamConfiguration &inputCfg, >> + const StreamConfiguration &outputCfg) >> +{ >> + BayerFormat bayerFormat = >> + BayerFormat::fromPixelFormat(inputCfg.pixelFormat); >> + width_ = inputCfg.size.width; >> + height_ = inputCfg.size.height; >> + stride_ = inputCfg.stride; >> + >> + if (bayerFormat.bitDepth != 10 || >> + bayerFormat.packing != BayerFormat::Packing::CSI2 || >> + width_ < 2 || height_ < 2) { >> + LOG(Converter, Error) << "Input format " >> + << inputCfg.size << "-" >> + << inputCfg.pixelFormat >> + << "not supported"; >> + return -EINVAL; >> + } >> + >> + switch (bayerFormat.order) { >> + case BayerFormat::BGGR: >> + red_shift_ = Point(0, 0); >> + break; >> + case BayerFormat::GBRG: >> + red_shift_ = Point(1, 0); >> + break; >> + case BayerFormat::GRBG: >> + red_shift_ = Point(0, 1); >> + break; >> + case BayerFormat::RGGB: >> + default: >> + red_shift_ = Point(1, 1); >> + break; >> + } >> + >> + if (outputCfg.size.width != width_ - 2 || >> + outputCfg.size.height != height_ - 2 || >> + outputCfg.stride != (width_ - 2) * 3 || >> + outputCfg.pixelFormat != formats::RGB888) { >> + LOG(Converter, Error) >> + << "Output format not supported"; >> + return -EINVAL; >> + } >> + >> + LOG(Converter, Info) << "SwConverter configuration: " >> + << inputCfg.size << "-" << inputCfg.pixelFormat >> + << " -> " >> + << outputCfg.size << "-" << outputCfg.pixelFormat; >> + >> + /* set r/g/b gains to 1.0 until frame data collected */ >> + rNumerat_ = rDenomin_ = 1; >> + bNumerat_ = bDenomin_ = 1; >> + gNumerat_ = gDenomin_ = 1; >> + >> + return 0; >> +} >> + >> +int SwConverter::exportBuffers(unsigned int output, unsigned int count, >> + std::vector<std::unique_ptr<FrameBuffer>> *buffers) >> +{ >> + /* single output for now */ >> + if (output >= 1) >> + return -EINVAL; >> + >> + return isp_->invokeMethod(&SwConverter::Isp::exportBuffers, >> + ConnectionTypeBlocking, count, buffers); >> +} >> + >> +int SwConverter::Isp::exportBuffers(unsigned int count, >> + std::vector<std::unique_ptr<FrameBuffer>> *buffers) >> +{ >> + /* V4L2_PIX_FMT_BGR24 aka 'BGR3' for output: */ >> + unsigned int bufSize = (height_ - 2) * (width_ - 2) * 3; >> + >> + for (unsigned int i = 0; i < count; i++) { >> + std::string name = "frame-" + std::to_string(i); >> + >> + const int ispFd = memfd_create(name.c_str(), 0); >> + int ret = ftruncate(ispFd, bufSize); >> + if (ret < 0) { >> + LOG(Converter, Error) << "ftruncate() for memfd failed " >> + << strerror(-ret); >> + return ret; >> + } >> + >> + FrameBuffer::Plane outPlane; >> + outPlane.fd = SharedFD(std::move(ispFd)); > > FrameBuffer fds need to be dmabufs, not memfds. Will fix that. Thanks, Andrey >> + outPlane.offset = 0; >> + outPlane.length = bufSize; >> + >> + std::vector<FrameBuffer::Plane> planes{ outPlane }; >> + buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes))); >> + } >> + >> + return count; >> +} >> + >> +int SwConverter::Isp::start() >> +{ >> + return 0; >> +} >> + >> +void SwConverter::Isp::stop() >> +{ >> +} >> + >> +void SwConverter::Isp::waitForIdle() >> +{ >> + MutexLocker locker(idleMutex_); >> + >> + idleCV_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(idleMutex_) { >> + return idle_; >> + }); >> +} >> + >> +int SwConverter::start() >> +{ >> + return isp_->invokeMethod(&SwConverter::Isp::start, >> + ConnectionTypeBlocking); >> +} >> + >> +void SwConverter::stop() >> +{ >> + isp_->invokeMethod(&SwConverter::Isp::stop, >> + ConnectionTypeBlocking); >> + isp_->invokeMethod(&SwConverter::Isp::waitForIdle, >> + ConnectionTypeDirect); >> +} >> + >> +int SwConverter::queueBuffers(FrameBuffer *input, >> + const std::map<unsigned int, FrameBuffer *> &outputs) >> +{ >> + unsigned int mask = 0; >> + >> + /* >> + * Validate the outputs as a sanity check: at least one output is >> + * required, all outputs must reference a valid stream and no two >> + * outputs can reference the same stream. >> + */ >> + if (outputs.empty()) >> + return -EINVAL; >> + >> + for (auto [index, buffer] : outputs) { >> + if (!buffer) >> + return -EINVAL; >> + if (index >= 1) /* only single stream atm */ >> + return -EINVAL; >> + if (mask & (1 << index)) >> + return -EINVAL; >> + >> + mask |= 1 << index; >> + } >> + >> + process(input, outputs.at(0)); >> + >> + return 0; >> +} >> + >> +void SwConverter::process(FrameBuffer *input, FrameBuffer *output) >> +{ >> + isp_->invokeMethod(&SwConverter::Isp::process, >> + ConnectionTypeQueued, input, output); >> +} >> + >> +void SwConverter::Isp::process(FrameBuffer *input, FrameBuffer *output) >> +{ >> + { >> + MutexLocker locker(idleMutex_); >> + idle_ = false; >> + } >> + >> + /* Copy metadata from the input buffer */ >> + FrameMetadata &metadata = output->_d()->metadata(); >> + metadata.status = input->metadata().status; >> + metadata.sequence = input->metadata().sequence; >> + metadata.timestamp = input->metadata().timestamp; >> + >> + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); >> + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); >> + if (!in.isValid() || !out.isValid()) { >> + LOG(Converter, Error) << "mmap-ing buffer(s) failed"; >> + metadata.status = FrameMetadata::FrameError; >> + converter_->outputBufferReady.emit(output); >> + converter_->inputBufferReady.emit(input); >> + return; >> + } >> + >> + debayer(out.planes()[0].data(), in.planes()[0].data()); >> + metadata.planes()[0].bytesused = out.planes()[0].size(); >> + >> + converter_->outputBufferReady.emit(output); >> + converter_->inputBufferReady.emit(input); >> + >> + { >> + MutexLocker locker(idleMutex_); >> + idle_ = true; >> + } >> + idleCV_.notify_all(); >> +} >> + >> +void SwConverter::Isp::debayer(uint8_t *dst, const uint8_t *src) >> +{ >> + /* RAW10P input format is assumed */ >> + >> + /* output buffer is in BGR24 format and is of (width-2)*(height-2) */ >> + >> + int w_out = width_ - 2; >> + int h_out = height_ - 2; >> + >> + unsigned long sumR = 0; >> + unsigned long sumB = 0; >> + unsigned long sumG = 0; >> + >> + for (int y = 0; y < h_out; y++) { >> + const uint8_t *pin_base = src + (y + 1) * stride_; >> + uint8_t *pout = dst + y * w_out * 3; >> + int phase_y = (y + red_shift_.y) % 2; >> + >> + for (int x = 0; x < w_out; x++) { >> + int phase_x = (x + red_shift_.x) % 2; >> + int phase = 2 * phase_y + phase_x; >> + >> + /* x part of the offset in the input buffer: */ >> + int x_m1 = x + x / 4; /* offset for (x-1) */ >> + int x_0 = x + 1 + (x + 1) / 4; /* offset for x */ >> + int x_p1 = x + 2 + (x + 2) / 4; /* offset for (x+1) */ >> + /* the colour component value to write to the output */ >> + unsigned val; >> + >> + switch (phase) { >> + case 0: /* at R pixel */ >> + /* blue: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */ >> + val = ( *(pin_base + x_m1 - stride_) >> + + *(pin_base + x_p1 - stride_) >> + + *(pin_base + x_m1 + stride_) >> + + *(pin_base + x_p1 + stride_) ) >> 2; >> + val = val * bNumerat_ / bDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */ >> + val = ( *(pin_base + x_0 - stride_) >> + + *(pin_base + x_p1) >> + + *(pin_base + x_m1) >> + + *(pin_base + x_0 + stride_) ) >> 2; >> + val = val * gNumerat_ / gDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* red: (0,0) */ >> + val = *(pin_base + x_0); >> + sumR += val; >> + val = val * rNumerat_ / rDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + break; >> + case 1: /* at Gr pixel */ >> + /* blue: ((0,-1) + (0,1)) / 2 */ >> + val = ( *(pin_base + x_0 - stride_) >> + + *(pin_base + x_0 + stride_) ) >> 1; >> + val = val * bNumerat_ / bDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* green: (0,0) */ >> + val = *(pin_base + x_0); >> + sumG += val; >> + val = val * gNumerat_ / gDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* red: ((-1,0) + (1,0)) / 2 */ >> + val = ( *(pin_base + x_m1) >> + + *(pin_base + x_p1) ) >> 1; >> + val = val * rNumerat_ / rDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + break; >> + case 2: /* at Gb pixel */ >> + /* blue: ((-1,0) + (1,0)) / 2 */ >> + val = ( *(pin_base + x_m1) >> + + *(pin_base + x_p1) ) >> 1; >> + val = val * bNumerat_ / bDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* green: (0,0) */ >> + val = *(pin_base + x_0); >> + sumG += val; >> + val = val * gNumerat_ / gDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* red: ((0,-1) + (0,1)) / 2 */ >> + val = ( *(pin_base + x_0 - stride_) >> + + *(pin_base + x_0 + stride_) ) >> 1; >> + val = val * rNumerat_ / rDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + break; >> + default: /* at B pixel */ >> + /* blue: (0,0) */ >> + val = *(pin_base + x_0); >> + sumB += val; >> + val = val * bNumerat_ / bDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */ >> + val = ( *(pin_base + x_0 - stride_) >> + + *(pin_base + x_p1) >> + + *(pin_base + x_m1) >> + + *(pin_base + x_0 + stride_) ) >> 2; >> + val = val * gNumerat_ / gDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + /* red: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */ >> + val = ( *(pin_base + x_m1 - stride_) >> + + *(pin_base + x_p1 - stride_) >> + + *(pin_base + x_m1 + stride_) >> + + *(pin_base + x_p1 + stride_) ) >> 2; >> + val = val * rNumerat_ / rDenomin_; >> + *pout++ = (uint8_t)std::min(val, 0xffU); >> + } >> + } >> + } >> + >> + /* calculate red and blue gains for simple AWB */ >> + LOG(Converter, Debug) << "sumR = " << sumR >> + << ", sumB = " << sumB << ", sumG = " << sumG; >> + >> + sumG /= 2; /* the number of G pixels is twice as big vs R and B ones */ >> + >> + /* normalize red, blue, and green sums to fit into 22-bit value */ >> + unsigned long fRed = sumR / 0x400000; >> + unsigned long fBlue = sumB / 0x400000; >> + unsigned long fGreen = sumG / 0x400000; >> + unsigned long fNorm = std::max({ 1UL, fRed, fBlue, fGreen }); >> + sumR /= fNorm; >> + sumB /= fNorm; >> + sumG /= fNorm; >> + >> + LOG(Converter, Debug) << "fNorm = " << fNorm; >> + LOG(Converter, Debug) << "Normalized: sumR = " << sumR >> + << ", sumB= " << sumB << ", sumG = " << sumG; >> + >> + /* make sure red/blue gains never exceed approximately 256 */ >> + unsigned long minDenom; >> + rNumerat_ = (sumR + sumB + sumG) / 3; >> + minDenom = rNumerat_ / 0x100; >> + rDenomin_ = std::max(minDenom, sumR); >> + bNumerat_ = rNumerat_; >> + bDenomin_ = std::max(minDenom, sumB); >> + gNumerat_ = rNumerat_; >> + gDenomin_ = std::max(minDenom, sumG); >> + >> + LOG(Converter, Debug) << "rGain = [ " >> + << rNumerat_ << " / " << rDenomin_ >> + << " ], bGain = [ " << bNumerat_ << " / " << bDenomin_ >> + << " ], gGain = [ " << gNumerat_ << " / " << gDenomin_ >> + << " (minDenom = " << minDenom << ")"; >> +} >> + >> +static std::initializer_list<std::string> compatibles = {}; >> + >> +REGISTER_CONVERTER("linaro-sw-converter", SwConverter, compatibles) >> + >> +} /* namespace libcamera */ >> diff --git a/src/libcamera/converter/meson.build b/src/libcamera/converter/meson.build >> index 2aa72fe4..1f1e0ea4 100644 >> --- a/src/libcamera/converter/meson.build >> +++ b/src/libcamera/converter/meson.build >> @@ -1,5 +1,6 @@ >> # SPDX-License-Identifier: CC0-1.0 >> >> libcamera_sources += files([ >> - 'converter_v4l2_m2m.cpp' >> + 'converter_softw.cpp', >> + 'converter_v4l2_m2m.cpp', >> ]) >
diff --git a/include/libcamera/internal/converter/converter_softw.h b/include/libcamera/internal/converter/converter_softw.h new file mode 100644 index 00000000..daa2d6da --- /dev/null +++ b/include/libcamera/internal/converter/converter_softw.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * + * converter_softw.h - interface of software converter (runs 100% on CPU) + */ + +#pragma once + +#include <functional> +#include <map> +#include <memory> +#include <string> +#include <tuple> +#include <vector> + +#include <libcamera/base/log.h> +#include <libcamera/base/mutex.h> +#include <libcamera/base/signal.h> +#include <libcamera/base/thread.h> + +#include <libcamera/pixel_format.h> + +#include "libcamera/internal/converter.h" + +namespace libcamera { + +class FrameBuffer; +class MediaDevice; +class Size; +class SizeRange; +struct StreamConfiguration; + +class SwConverter : public Converter +{ +public: + SwConverter(MediaDevice *media); + + int loadConfiguration([[maybe_unused]] const std::string &filename) { return 0; } + bool isValid() const { return isp_ != nullptr; } + + std::vector<PixelFormat> formats(PixelFormat input); + SizeRange sizes(const Size &input); + + std::tuple<unsigned int, unsigned int> + strideAndFrameSize(const PixelFormat &pixelFormat, const Size &size); + + int configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfg); + int exportBuffers(unsigned int ouput, unsigned int count, + std::vector<std::unique_ptr<FrameBuffer>> *buffers); + + void process(FrameBuffer *input, FrameBuffer *output); + int start(); + void stop(); + + int queueBuffers(FrameBuffer *input, + const std::map<unsigned int, FrameBuffer *> &outputs); + +private: + class Isp : public Object + { + public: + Isp(SwConverter *converter); + ~Isp(); + + int configure(const StreamConfiguration &inputCfg, + const StreamConfiguration &outputCfg); + int exportBuffers(unsigned int count, + std::vector<std::unique_ptr<FrameBuffer>> *buffers); + void process(FrameBuffer *input, FrameBuffer *output); + int start(); + void stop(); + void waitForIdle(); + + private: + void debayer(uint8_t *dst, const uint8_t *src); + + SwConverter *converter_; + + Thread thread_; + + libcamera::Mutex idleMutex_; + libcamera::ConditionVariable idleCV_; + bool idle_ LIBCAMERA_TSA_GUARDED_BY(idleMutex_); + + unsigned int width_; + unsigned int height_; + unsigned int stride_; + Point red_shift_; + + unsigned long rNumerat_, rDenomin_; /* red gain for AWB */ + unsigned long bNumerat_, bDenomin_; /* blue gain for AWB */ + unsigned long gNumerat_, gDenomin_; /* green gain for AWB */ + }; + + std::unique_ptr<Isp> isp_; +}; + +} /* namespace libcamera */ diff --git a/include/libcamera/internal/converter/meson.build b/include/libcamera/internal/converter/meson.build index 891e79e7..843a0483 100644 --- a/include/libcamera/internal/converter/meson.build +++ b/include/libcamera/internal/converter/meson.build @@ -1,5 +1,6 @@ # SPDX-License-Identifier: CC0-1.0 libcamera_internal_headers += files([ + 'converter_softw.h', 'converter_v4l2_m2m.h', ]) diff --git a/src/libcamera/converter/converter_softw.cpp b/src/libcamera/converter/converter_softw.cpp new file mode 100644 index 00000000..67245715 --- /dev/null +++ b/src/libcamera/converter/converter_softw.cpp @@ -0,0 +1,445 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * + * converter_softw.h - interface of software converter (runs 100% on CPU) + */ + +#include "libcamera/internal/converter/converter_softw.h" + +#include <sys/mman.h> +#include <sys/types.h> +#include <unistd.h> + +#include <libcamera/formats.h> +#include <libcamera/stream.h> + +#include "libcamera/internal/bayer_format.h" +#include "libcamera/internal/framebuffer.h" +#include "libcamera/internal/mapped_framebuffer.h" + +namespace libcamera { + +LOG_DECLARE_CATEGORY(Converter) + +SwConverter::SwConverter([[maybe_unused]] MediaDevice *media) + : Converter() +{ + isp_ = std::make_unique<SwConverter::Isp>(this); +} + +std::vector<PixelFormat> SwConverter::formats(PixelFormat input) +{ + BayerFormat inputFormat = BayerFormat::fromPixelFormat(input); + + /* Only RAW10P is currently supported */ + if (inputFormat.bitDepth != 10 || + inputFormat.packing != BayerFormat::Packing::CSI2) { + LOG(Converter, Info) + << "Unsupported input format " << input.toString(); + return {}; + } + + return { formats::RGB888 }; +} + +SizeRange SwConverter::sizes(const Size &input) +{ + if (input.width < 2 || input.height < 2) { + LOG(Converter, Error) + << "Input format size too small: " << input.toString(); + return {}; + } + + return SizeRange(Size(input.width - 2, input.height - 2)); +} + +std::tuple<unsigned int, unsigned int> +SwConverter::strideAndFrameSize(const PixelFormat &pixelFormat, + const Size &size) +{ + /* Only RGB888 output is currently supported */ + if (pixelFormat != formats::RGB888) + return {}; + + unsigned int stride = size.width * 3; + return std::make_tuple(stride, stride * (size.height)); +} + +int SwConverter::configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) +{ + if (outputCfgs.size() != 1) { + LOG(Converter, Error) + << "Unsupported number of output streams: " + << outputCfgs.size(); + return -EINVAL; + } + + return isp_->invokeMethod(&SwConverter::Isp::configure, + ConnectionTypeBlocking, inputCfg, outputCfgs[0]); +} + +SwConverter::Isp::Isp(SwConverter *converter) + : converter_(converter) +{ + moveToThread(&thread_); + thread_.start(); +} + +SwConverter::Isp::~Isp() +{ + thread_.exit(); + thread_.wait(); +} + +int SwConverter::Isp::configure(const StreamConfiguration &inputCfg, + const StreamConfiguration &outputCfg) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputCfg.pixelFormat); + width_ = inputCfg.size.width; + height_ = inputCfg.size.height; + stride_ = inputCfg.stride; + + if (bayerFormat.bitDepth != 10 || + bayerFormat.packing != BayerFormat::Packing::CSI2 || + width_ < 2 || height_ < 2) { + LOG(Converter, Error) << "Input format " + << inputCfg.size << "-" + << inputCfg.pixelFormat + << "not supported"; + return -EINVAL; + } + + switch (bayerFormat.order) { + case BayerFormat::BGGR: + red_shift_ = Point(0, 0); + break; + case BayerFormat::GBRG: + red_shift_ = Point(1, 0); + break; + case BayerFormat::GRBG: + red_shift_ = Point(0, 1); + break; + case BayerFormat::RGGB: + default: + red_shift_ = Point(1, 1); + break; + } + + if (outputCfg.size.width != width_ - 2 || + outputCfg.size.height != height_ - 2 || + outputCfg.stride != (width_ - 2) * 3 || + outputCfg.pixelFormat != formats::RGB888) { + LOG(Converter, Error) + << "Output format not supported"; + return -EINVAL; + } + + LOG(Converter, Info) << "SwConverter configuration: " + << inputCfg.size << "-" << inputCfg.pixelFormat + << " -> " + << outputCfg.size << "-" << outputCfg.pixelFormat; + + /* set r/g/b gains to 1.0 until frame data collected */ + rNumerat_ = rDenomin_ = 1; + bNumerat_ = bDenomin_ = 1; + gNumerat_ = gDenomin_ = 1; + + return 0; +} + +int SwConverter::exportBuffers(unsigned int output, unsigned int count, + std::vector<std::unique_ptr<FrameBuffer>> *buffers) +{ + /* single output for now */ + if (output >= 1) + return -EINVAL; + + return isp_->invokeMethod(&SwConverter::Isp::exportBuffers, + ConnectionTypeBlocking, count, buffers); +} + +int SwConverter::Isp::exportBuffers(unsigned int count, + std::vector<std::unique_ptr<FrameBuffer>> *buffers) +{ + /* V4L2_PIX_FMT_BGR24 aka 'BGR3' for output: */ + unsigned int bufSize = (height_ - 2) * (width_ - 2) * 3; + + for (unsigned int i = 0; i < count; i++) { + std::string name = "frame-" + std::to_string(i); + + const int ispFd = memfd_create(name.c_str(), 0); + int ret = ftruncate(ispFd, bufSize); + if (ret < 0) { + LOG(Converter, Error) << "ftruncate() for memfd failed " + << strerror(-ret); + return ret; + } + + FrameBuffer::Plane outPlane; + outPlane.fd = SharedFD(std::move(ispFd)); + outPlane.offset = 0; + outPlane.length = bufSize; + + std::vector<FrameBuffer::Plane> planes{ outPlane }; + buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes))); + } + + return count; +} + +int SwConverter::Isp::start() +{ + return 0; +} + +void SwConverter::Isp::stop() +{ +} + +void SwConverter::Isp::waitForIdle() +{ + MutexLocker locker(idleMutex_); + + idleCV_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(idleMutex_) { + return idle_; + }); +} + +int SwConverter::start() +{ + return isp_->invokeMethod(&SwConverter::Isp::start, + ConnectionTypeBlocking); +} + +void SwConverter::stop() +{ + isp_->invokeMethod(&SwConverter::Isp::stop, + ConnectionTypeBlocking); + isp_->invokeMethod(&SwConverter::Isp::waitForIdle, + ConnectionTypeDirect); +} + +int SwConverter::queueBuffers(FrameBuffer *input, + const std::map<unsigned int, FrameBuffer *> &outputs) +{ + unsigned int mask = 0; + + /* + * Validate the outputs as a sanity check: at least one output is + * required, all outputs must reference a valid stream and no two + * outputs can reference the same stream. + */ + if (outputs.empty()) + return -EINVAL; + + for (auto [index, buffer] : outputs) { + if (!buffer) + return -EINVAL; + if (index >= 1) /* only single stream atm */ + return -EINVAL; + if (mask & (1 << index)) + return -EINVAL; + + mask |= 1 << index; + } + + process(input, outputs.at(0)); + + return 0; +} + +void SwConverter::process(FrameBuffer *input, FrameBuffer *output) +{ + isp_->invokeMethod(&SwConverter::Isp::process, + ConnectionTypeQueued, input, output); +} + +void SwConverter::Isp::process(FrameBuffer *input, FrameBuffer *output) +{ + { + MutexLocker locker(idleMutex_); + idle_ = false; + } + + /* Copy metadata from the input buffer */ + FrameMetadata &metadata = output->_d()->metadata(); + metadata.status = input->metadata().status; + metadata.sequence = input->metadata().sequence; + metadata.timestamp = input->metadata().timestamp; + + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); + if (!in.isValid() || !out.isValid()) { + LOG(Converter, Error) << "mmap-ing buffer(s) failed"; + metadata.status = FrameMetadata::FrameError; + converter_->outputBufferReady.emit(output); + converter_->inputBufferReady.emit(input); + return; + } + + debayer(out.planes()[0].data(), in.planes()[0].data()); + metadata.planes()[0].bytesused = out.planes()[0].size(); + + converter_->outputBufferReady.emit(output); + converter_->inputBufferReady.emit(input); + + { + MutexLocker locker(idleMutex_); + idle_ = true; + } + idleCV_.notify_all(); +} + +void SwConverter::Isp::debayer(uint8_t *dst, const uint8_t *src) +{ + /* RAW10P input format is assumed */ + + /* output buffer is in BGR24 format and is of (width-2)*(height-2) */ + + int w_out = width_ - 2; + int h_out = height_ - 2; + + unsigned long sumR = 0; + unsigned long sumB = 0; + unsigned long sumG = 0; + + for (int y = 0; y < h_out; y++) { + const uint8_t *pin_base = src + (y + 1) * stride_; + uint8_t *pout = dst + y * w_out * 3; + int phase_y = (y + red_shift_.y) % 2; + + for (int x = 0; x < w_out; x++) { + int phase_x = (x + red_shift_.x) % 2; + int phase = 2 * phase_y + phase_x; + + /* x part of the offset in the input buffer: */ + int x_m1 = x + x / 4; /* offset for (x-1) */ + int x_0 = x + 1 + (x + 1) / 4; /* offset for x */ + int x_p1 = x + 2 + (x + 2) / 4; /* offset for (x+1) */ + /* the colour component value to write to the output */ + unsigned val; + + switch (phase) { + case 0: /* at R pixel */ + /* blue: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */ + val = ( *(pin_base + x_m1 - stride_) + + *(pin_base + x_p1 - stride_) + + *(pin_base + x_m1 + stride_) + + *(pin_base + x_p1 + stride_) ) >> 2; + val = val * bNumerat_ / bDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */ + val = ( *(pin_base + x_0 - stride_) + + *(pin_base + x_p1) + + *(pin_base + x_m1) + + *(pin_base + x_0 + stride_) ) >> 2; + val = val * gNumerat_ / gDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* red: (0,0) */ + val = *(pin_base + x_0); + sumR += val; + val = val * rNumerat_ / rDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + break; + case 1: /* at Gr pixel */ + /* blue: ((0,-1) + (0,1)) / 2 */ + val = ( *(pin_base + x_0 - stride_) + + *(pin_base + x_0 + stride_) ) >> 1; + val = val * bNumerat_ / bDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* green: (0,0) */ + val = *(pin_base + x_0); + sumG += val; + val = val * gNumerat_ / gDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* red: ((-1,0) + (1,0)) / 2 */ + val = ( *(pin_base + x_m1) + + *(pin_base + x_p1) ) >> 1; + val = val * rNumerat_ / rDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + break; + case 2: /* at Gb pixel */ + /* blue: ((-1,0) + (1,0)) / 2 */ + val = ( *(pin_base + x_m1) + + *(pin_base + x_p1) ) >> 1; + val = val * bNumerat_ / bDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* green: (0,0) */ + val = *(pin_base + x_0); + sumG += val; + val = val * gNumerat_ / gDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* red: ((0,-1) + (0,1)) / 2 */ + val = ( *(pin_base + x_0 - stride_) + + *(pin_base + x_0 + stride_) ) >> 1; + val = val * rNumerat_ / rDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + break; + default: /* at B pixel */ + /* blue: (0,0) */ + val = *(pin_base + x_0); + sumB += val; + val = val * bNumerat_ / bDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* green: ((0,-1)+(-1,0)+(1,0)+(0,1)) / 4 */ + val = ( *(pin_base + x_0 - stride_) + + *(pin_base + x_p1) + + *(pin_base + x_m1) + + *(pin_base + x_0 + stride_) ) >> 2; + val = val * gNumerat_ / gDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + /* red: ((-1,-1)+(1,-1)+(-1,1)+(1,1)) / 4 */ + val = ( *(pin_base + x_m1 - stride_) + + *(pin_base + x_p1 - stride_) + + *(pin_base + x_m1 + stride_) + + *(pin_base + x_p1 + stride_) ) >> 2; + val = val * rNumerat_ / rDenomin_; + *pout++ = (uint8_t)std::min(val, 0xffU); + } + } + } + + /* calculate red and blue gains for simple AWB */ + LOG(Converter, Debug) << "sumR = " << sumR + << ", sumB = " << sumB << ", sumG = " << sumG; + + sumG /= 2; /* the number of G pixels is twice as big vs R and B ones */ + + /* normalize red, blue, and green sums to fit into 22-bit value */ + unsigned long fRed = sumR / 0x400000; + unsigned long fBlue = sumB / 0x400000; + unsigned long fGreen = sumG / 0x400000; + unsigned long fNorm = std::max({ 1UL, fRed, fBlue, fGreen }); + sumR /= fNorm; + sumB /= fNorm; + sumG /= fNorm; + + LOG(Converter, Debug) << "fNorm = " << fNorm; + LOG(Converter, Debug) << "Normalized: sumR = " << sumR + << ", sumB= " << sumB << ", sumG = " << sumG; + + /* make sure red/blue gains never exceed approximately 256 */ + unsigned long minDenom; + rNumerat_ = (sumR + sumB + sumG) / 3; + minDenom = rNumerat_ / 0x100; + rDenomin_ = std::max(minDenom, sumR); + bNumerat_ = rNumerat_; + bDenomin_ = std::max(minDenom, sumB); + gNumerat_ = rNumerat_; + gDenomin_ = std::max(minDenom, sumG); + + LOG(Converter, Debug) << "rGain = [ " + << rNumerat_ << " / " << rDenomin_ + << " ], bGain = [ " << bNumerat_ << " / " << bDenomin_ + << " ], gGain = [ " << gNumerat_ << " / " << gDenomin_ + << " (minDenom = " << minDenom << ")"; +} + +static std::initializer_list<std::string> compatibles = {}; + +REGISTER_CONVERTER("linaro-sw-converter", SwConverter, compatibles) + +} /* namespace libcamera */ diff --git a/src/libcamera/converter/meson.build b/src/libcamera/converter/meson.build index 2aa72fe4..1f1e0ea4 100644 --- a/src/libcamera/converter/meson.build +++ b/src/libcamera/converter/meson.build @@ -1,5 +1,6 @@ # SPDX-License-Identifier: CC0-1.0 libcamera_sources += files([ - 'converter_v4l2_m2m.cpp' + 'converter_softw.cpp', + 'converter_v4l2_m2m.cpp', ])
Use the Converter interface to implement bilinear Bayer demosaic filtering and very simple AWB on CPU. The only output format currently supported is RGB888. Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org> --- .../internal/converter/converter_softw.h | 100 ++++ .../libcamera/internal/converter/meson.build | 1 + src/libcamera/converter/converter_softw.cpp | 445 ++++++++++++++++++ src/libcamera/converter/meson.build | 3 +- 4 files changed, 548 insertions(+), 1 deletion(-) create mode 100644 include/libcamera/internal/converter/converter_softw.h create mode 100644 src/libcamera/converter/converter_softw.cpp