Message ID | 20240113142218.28063-11-hdegoede@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series |
|
Related | show |
Hi! > Add CPU based debayering implementation. This initial implementation > only supports debayering packed 10 bits per pixel bayer data in > the 4 standard bayer orders. > > Doxygen documentation by Dennis Bonke. > +++ b/include/libcamera/internal/software_isp/debayer_cpu.h > +/** > + * \class DebayerCpu > + * \brief Class for debayering on the CPU > + * > + * Implementation for CPU based debayering > + */ > +class DebayerCpu : public Debayer, public Object > +{ > +public: > + /* > + * FIXME this should be a plain (implementation independent) SwStats > + * this can be fixed once getStats() is dropped. > + */ Is the FIXME still applicable? > + /* Skip 30 frames for things to stabilize then measure 30 frames */ > + static const int framesToSkip = 30; > + static const int framesToMeasure = 60; > +}; Should the stats be somehow optional? I guess setting framesToMeasure to -1 does that. > +// RGR > +// GBG > +// RGR > +#define BGGR_BGR888(p, n, div) \ > + *dst++ = blue_[curr[x] / (div)]; \ > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ > + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ > + x++; I'm not exactly happy about these macros. I'd really like inline functions; but I guess this can be revisited post-merge, perhaps at the same time as extending outputformats. > +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */ Best regards, Pavel
Hans de Goede <hdegoede@redhat.com> writes: > Add CPU based debayering implementation. This initial implementation > only supports debayering packed 10 bits per pixel bayer data in > the 4 standard bayer orders. > > Doxygen documentation by Dennis Bonke. > > Co-authored-by: Dennis Bonke <admin@dennisbonke.com> > Signed-off-by: Dennis Bonke <admin@dennisbonke.com> > Co-authored-by: Andrey Konovalov <andrey.konovalov@linaro.org> > Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org> > Co-authored-by: Pavel Machek <pavel@ucw.cz> > Signed-off-by: Pavel Machek <pavel@ucw.cz> > Signed-off-by: Hans de Goede <hdegoede@redhat.com> > Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s > Tested-by: Pavel Machek <pavel@ucw.cz> > --- > .../internal/software_isp/debayer_cpu.h | 131 +++++ > .../internal/software_isp/meson.build | 1 + > src/libcamera/software_isp/debayer_cpu.cpp | 528 ++++++++++++++++++ > src/libcamera/software_isp/meson.build | 1 + > 4 files changed, 661 insertions(+) > create mode 100644 include/libcamera/internal/software_isp/debayer_cpu.h > create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp > > diff --git a/include/libcamera/internal/software_isp/debayer_cpu.h b/include/libcamera/internal/software_isp/debayer_cpu.h > new file mode 100644 > index 00000000..78573f44 > --- /dev/null > +++ b/include/libcamera/internal/software_isp/debayer_cpu.h > @@ -0,0 +1,131 @@ > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > +/* > + * Copyright (C) 2023, Linaro Ltd > + * Copyright (C) 2023, Red Hat Inc. > + * > + * Authors: > + * Hans de Goede <hdegoede@redhat.com> > + * > + * debayer_cpu.h - CPU based debayering header > + */ > + > +#pragma once > + > +#include <memory> > +#include <stdint.h> > +#include <vector> > + > +#include <libcamera/base/object.h> > + > +#include "libcamera/internal/software_isp/swstats_cpu.h" > +#include "libcamera/internal/software_isp/debayer.h" > + > +namespace libcamera { > + > +/** > + * \class DebayerCpu > + * \brief Class for debayering on the CPU > + * > + * Implementation for CPU based debayering > + */ > +class DebayerCpu : public Debayer, public Object > +{ > +public: > + /* > + * FIXME this should be a plain (implementation independent) SwStats > + * this can be fixed once getStats() is dropped. > + */ > + /** > + * \brief Constructs a DebayerCpu object. > + * \param[in] stats Pointer to the stats object to use. > + */ > + DebayerCpu(std::unique_ptr<SwStatsCpu> stats); > + ~DebayerCpu(); > + > + /* > + * Setup the Debayer object according to the passed in parameters. > + * Return 0 on success, a negative errno value on failure > + * (unsupported parameters). > + */ > + int configure(const StreamConfiguration &inputCfg, > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs); > + > + /* > + * Get width and height at which the bayer-pattern repeats. > + * Return pattern-size or an empty Size for an unsupported inputFormat. > + */ > + Size patternSize(PixelFormat inputFormat); > + > + std::vector<PixelFormat> formats(PixelFormat input); > + std::tuple<unsigned int, unsigned int> > + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size); > + > + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params); > + > + /** > + * \brief Get the file descriptor for the statistics. > + * > + * \return the file descriptor pointing to the statistics. > + */ > + const SharedFD &getStatsFD() { return stats_->getStatsFD(); } > + > + /** > + * \brief Get the output frame size. > + * > + * \return The output frame size. > + */ > + unsigned int frameSize() { return outputConfig_.frameSize; } > +private: > + void initLinePointers(const uint8_t *linePointers[], const uint8_t *src); > + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src); > + void process2(const uint8_t *src, uint8_t *dst); > + void process4(const uint8_t *src, uint8_t *dst); > + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */ > + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); > + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); > + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]); > + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]); > + > + typedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]); > + > + struct DebayerInputConfig { > + Size patternSize; > + unsigned int bpp; /* Memory used per pixel, not precision */ > + unsigned int stride; > + std::vector<PixelFormat> outputFormats; > + }; > + > + struct DebayerOutputConfig { > + unsigned int bpp; /* Memory used per pixel, not precision */ > + unsigned int stride; > + unsigned int frameSize; > + }; > + > + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config); > + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config); > + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat); > + > + uint8_t gamma_[1024]; > + uint8_t red_[256]; > + uint8_t green_[256]; > + uint8_t blue_[256]; Named constants for the array sizes? > + debayerFn debayer0_; > + debayerFn debayer1_; > + debayerFn debayer2_; > + debayerFn debayer3_; > + Rectangle window_; > + DebayerInputConfig inputConfig_; > + DebayerOutputConfig outputConfig_; > + std::unique_ptr<SwStatsCpu> stats_; > + uint8_t *lineBuffers_[5]; A named constant for the array size? Also, the array size assumes a limit on the pattern height, which is reasonable but not documented. > + unsigned int lineBufferIndex_; > + bool enableInputMemcpy_; > + float gamma_correction_; > + int measuredFrames_; > + int64_t frameProcessTime_; > + /* Skip 30 frames for things to stabilize then measure 30 frames */ > + static const int framesToSkip = 30; > + static const int framesToMeasure = 60; Frames-to-measure is 30, a name like lastFrameToMeasure might be a bit clearer. > +}; > + > +} /* namespace libcamera */ > diff --git a/include/libcamera/internal/software_isp/meson.build b/include/libcamera/internal/software_isp/meson.build > index 7e40925e..b5a0d737 100644 > --- a/include/libcamera/internal/software_isp/meson.build > +++ b/include/libcamera/internal/software_isp/meson.build > @@ -2,6 +2,7 @@ > > libcamera_internal_headers += files([ > 'debayer.h', > + 'debayer_cpu.h', > 'debayer_params.h', > 'swisp_stats.h', > 'swstats.h', > diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp > new file mode 100644 > index 00000000..e0c3c658 > --- /dev/null > +++ b/src/libcamera/software_isp/debayer_cpu.cpp > @@ -0,0 +1,528 @@ > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > +/* > + * Copyright (C) 2023, Linaro Ltd > + * Copyright (C) 2023, Red Hat Inc. > + * > + * Authors: > + * Hans de Goede <hdegoede@redhat.com> > + * > + * debayer_cpu.cpp - CPU based debayering class > + */ > + > +#include "libcamera/internal/software_isp/debayer_cpu.h" > + > +#include <math.h> > +#include <stdlib.h> > +#include <time.h> > + > +#include <libcamera/formats.h> > + > +#include "libcamera/internal/bayer_format.h" > +#include "libcamera/internal/framebuffer.h" > +#include "libcamera/internal/mapped_framebuffer.h" > + > +namespace libcamera { > + > +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats) > + : stats_(std::move(stats)), gamma_correction_(1.0) > +{ > +#ifdef __x86_64__ > + enableInputMemcpy_ = false; > +#else > + enableInputMemcpy_ = true; > +#endif > + /* Initialize gamma to 1.0 curve */ > + for (int i = 0; i < 1024; i++) > + gamma_[i] = i / 4; > + > + for (int i = 0; i < 5; i++) > + lineBuffers_[i] = NULL; A question for C/C++ experts whether NULL or nullptr should be used here, with a special consideration of free() calls below. > +} > + > +DebayerCpu::~DebayerCpu() > +{ > + for (int i = 0; i < 5; i++) > + free(lineBuffers_[i]); > +} > + > +// RGR > +// GBG > +// RGR > +#define BGGR_BGR888(p, n, div) \ > + *dst++ = blue_[curr[x] / (div)]; \ > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ > + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ > + x++; > + > +// GBG > +// RGR > +// GBG > +#define GRBG_BGR888(p, n, div) \ > + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \ > + *dst++ = green_[curr[x] / (div)]; \ > + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ > + x++; > + > +// GRG > +// BGB > +// GRG > +#define GBRG_BGR888(p, n, div) \ > + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ > + *dst++ = green_[curr[x] / (div)]; \ > + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \ > + x++; > + > +// BGB > +// GRG > +// BGB > +#define RGGB_BGR888(p, n, div) \ > + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ > + *dst++ = red_[curr[x] / (div)]; \ > + x++; > + > +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + /* > + * For the first pixel getting a pixel from the previous column uses > + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels. > + * Same for last pixel (uses x + 2) and looking at the next column. > + * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed. > + */ > + for (int x = 0; x < width_in_bytes; x++) { > + /* Even pixel */ > + BGGR_BGR888(2, 1, 1) > + /* Odd pixel BGGR -> GBRG */ > + GBRG_BGR888(1, 1, 1) > + /* Same thing for next 2 pixels */ > + BGGR_BGR888(1, 1, 1) > + GBRG_BGR888(1, 2, 1) "Even" and "odd" is a bit confusing here. How about "first", "second", "third", "fourth"? > + } > +} > + > +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + for (int x = 0; x < width_in_bytes; x++) { > + /* Even pixel */ > + GRBG_BGR888(2, 1, 1) > + /* Odd pixel GRBG -> RGGB */ > + RGGB_BGR888(1, 1, 1) > + /* Same thing for next 2 pixels */ > + GRBG_BGR888(1, 1, 1) > + RGGB_BGR888(1, 2, 1) > + } > +} > + > +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + for (int x = 0; x < width_in_bytes; x++) { > + /* Even pixel */ > + GBRG_BGR888(2, 1, 1) > + /* Odd pixel GBGR -> BGGR */ > + BGGR_BGR888(1, 1, 1) > + /* Same thing for next 2 pixels */ > + GBRG_BGR888(1, 1, 1) > + BGGR_BGR888(1, 2, 1) > + } > +} > + > +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + for (int x = 0; x < width_in_bytes; x++) { > + /* Even pixel */ > + RGGB_BGR888(2, 1, 1) > + /* Odd pixel RGGB -> GRBG*/ > + GRBG_BGR888(1, 1, 1) > + /* Same thing for next 2 pixels */ > + RGGB_BGR888(1, 1, 1) > + GRBG_BGR888(1, 2, 1) > + } > +} > + > +static bool isStandardBayerOrder(BayerFormat::Order order) > +{ > + return order == BayerFormat::BGGR || order == BayerFormat::GBRG || > + order == BayerFormat::GRBG || order == BayerFormat::RGGB; > +} > + > +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config) > +{ > + BayerFormat bayerFormat = > + BayerFormat::fromPixelFormat(inputFormat); > + > + if (bayerFormat.bitDepth == 10 && > + bayerFormat.packing == BayerFormat::Packing::CSI2 && > + isStandardBayerOrder(bayerFormat.order)) { > + config.bpp = 10; > + config.patternSize.width = 4; /* 5 bytes per *4* pixels */ > + config.patternSize.height = 2; > + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 }); > + return 0; > + } > + > + LOG(Debayer, Info) > + << "Unsupported input format " << inputFormat.toString(); > + return -EINVAL; > +} > + > +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config) > +{ > + if (outputFormat == formats::RGB888) { > + config.bpp = 24; > + return 0; > + } > + > + LOG(Debayer, Info) > + << "Unsupported output format " << outputFormat.toString(); > + return -EINVAL; > +} > + > +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */ > +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat) > +{ > + BayerFormat bayerFormat = > + BayerFormat::fromPixelFormat(inputFormat); > + > + if (bayerFormat.bitDepth == 10 && > + bayerFormat.packing == BayerFormat::Packing::CSI2) { > + switch (bayerFormat.order) { > + case BayerFormat::BGGR: > + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888; > + return 0; > + case BayerFormat::GBRG: > + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888; > + return 0; > + case BayerFormat::GRBG: > + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888; > + return 0; > + case BayerFormat::RGGB: > + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888; > + return 0; > + default: > + break; > + } > + } > + > + LOG(Debayer, Error) << "Unsupported input output format combination"; > + return -EINVAL; > +} > + > +int DebayerCpu::configure(const StreamConfiguration &inputCfg, > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) > +{ > + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) > + return -EINVAL; > + > + if (stats_->configure(inputCfg) != 0) > + return -EINVAL; > + > + const Size &stats_pattern_size = stats_->patternSize(); > + if (inputConfig_.patternSize.width != stats_pattern_size.width || > + inputConfig_.patternSize.height != stats_pattern_size.height) { > + LOG(Debayer, Error) > + << "mismatching stats and debayer pattern sizes for " > + << inputCfg.pixelFormat.toString(); > + return -EINVAL; > + } > + > + inputConfig_.stride = inputCfg.stride; > + > + if (outputCfgs.size() != 1) { > + LOG(Debayer, Error) > + << "Unsupported number of output streams: " > + << outputCfgs.size(); > + return -EINVAL; > + } > + > + const StreamConfiguration &outputCfg = outputCfgs[0]; > + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); > + std::tie(outputConfig_.stride, outputConfig_.frameSize) = > + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); > + > + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { > + LOG(Debayer, Error) > + << "Invalid output size/stride: " > + << "\n " << outputCfg.size << " (" << outSizeRange << ")" > + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; > + return -EINVAL; > + } > + > + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0) > + return -EINVAL; > + > + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & > + ~(inputConfig_.patternSize.width - 1); > + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & > + ~(inputConfig_.patternSize.height - 1); *Implicit* assumption that width and height are powers of 2? > + window_.width = outputCfg.size.width; > + window_.height = outputCfg.size.height; > + > + /* Don't pass x,y since process() already adjusts src before passing it */ > + stats_->setWindow(Rectangle(window_.size())); > + > + for (unsigned int i = 0; > + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_; > + i++) { > + /* pad with patternSize.Width on both left and right side */ > + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * > + inputConfig_.bpp / 8; > + > + free(lineBuffers_[i]); > + lineBuffers_[i] = (uint8_t *)malloc(lineLength); > + if (!lineBuffers_[i]) > + return -ENOMEM; What happens if malloc above fails and then free in the DebayerCpu destructor is called? > + } > + > + measuredFrames_ = 0; > + frameProcessTime_ = 0; > + > + return 0; > +} > + > +Size DebayerCpu::patternSize(PixelFormat inputFormat) > +{ > + DebayerCpu::DebayerInputConfig config; > + > + if (getInputConfig(inputFormat, config) != 0) > + return {}; > + > + return config.patternSize; > +} > + > +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat) > +{ > + DebayerCpu::DebayerInputConfig config; > + > + if (getInputConfig(inputFormat, config) != 0) > + return std::vector<PixelFormat>(); > + > + return config.outputFormats; > +} > + > +std::tuple<unsigned int, unsigned int> > +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) > +{ > + DebayerCpu::DebayerOutputConfig config; > + > + if (getOutputConfig(outputFormat, config) != 0) > + return std::make_tuple(0, 0); > + > + /* round up to multiple of 8 for 64 bits alignment */ > + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7; > + > + return std::make_tuple(stride, stride * size.height); > +} > + > +void DebayerCpu::initLinePointers(const uint8_t *linePointers[], const uint8_t *src) > +{ > + const int patternHeight = inputConfig_.patternSize.height; > + > + for (int i = 0; i < patternHeight; i++) > + linePointers[i + 1] = src + > + (-patternHeight / 2 + i) * (int)inputConfig_.stride; > + > + if (!enableInputMemcpy_) > + return; > + > + for (int i = 0; i < patternHeight; i++) { > + /* pad with patternSize.Width on both left and right side */ > + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * > + inputConfig_.bpp / 8; > + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8; The code/pattern duplication in lineLength and padding computation above and below calls for inline helper functions. > + > + memcpy(lineBuffers_[i], linePointers[i + 1] - padding, lineLength); > + linePointers[i + 1] = lineBuffers_[i] + padding; > + } > + > + /* Point lineBufferIndex_ to first unused lineBuffer */ > + lineBufferIndex_ = patternHeight; Tracking all the memory locations here and checking their correctness is difficult. Not sure how to make it easier to understand, maybe more local variables or comments? > +} > + > +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src) > +{ > + const int patternHeight = inputConfig_.patternSize.height; > + > + for (int i = 0; i < patternHeight; i++) > + linePointers[i] = linePointers[i + 1]; > + > + linePointers[patternHeight] = src + > + (patternHeight / 2) * (int)inputConfig_.stride; > + > + if (!enableInputMemcpy_) > + return; > + > + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * > + inputConfig_.bpp / 8; > + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8; > + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - padding, lineLength); > + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + padding; > + > + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1); > +} > + > +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst) > +{ > + const unsigned int y_end = window_.y + window_.height; > + const uint8_t *linePointers[3]; > + > + /* Adjust src to top left corner of the window */ > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; > + > + initLinePointers(linePointers, src); > + > + for (unsigned int y = window_.y; y < y_end; y += 2) { > + shiftLinePointers(linePointers, src); > + stats_->processLine0(y, linePointers); > + (this->*debayer0_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + (this->*debayer1_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + } > +} > + > +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst) > +{ > + const unsigned int y_end = window_.y + window_.height; > + const uint8_t *linePointers[5]; > + > + /* Adjust src to top left corner of the window */ > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; > + > + initLinePointers(linePointers, src); > + > + for (unsigned int y = window_.y; y < y_end; y += 4) { > + shiftLinePointers(linePointers, src); > + stats_->processLine0(y, linePointers); > + (this->*debayer0_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + (this->*debayer1_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + stats_->processLine2(y, linePointers); > + (this->*debayer2_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + (this->*debayer3_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + } > +} > + > +static inline int64_t timeDiff(timespec &after, timespec &before) > +{ > + return (after.tv_sec - before.tv_sec) * 1000000000LL + > + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec; > +} > + > +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) > +{ > + timespec frameStartTime; > + > + if (measuredFrames_ < DebayerCpu::framesToMeasure) { > + frameStartTime = {}; > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime); > + } > + > + /* Apply DebayerParams */ > + if (params.gamma != gamma_correction_) { > + for (int i = 0; i < 1024; i++) > + gamma_[i] = 255 * powf(i / 1023.0, params.gamma); > + > + gamma_correction_ = params.gamma; > + } > + > + for (int i = 0; i < 256; i++) { > + int idx; > + > + /* Apply gamma after gain! */ > + idx = std::min({ i * params.gainR / 64U, 1023U }); > + red_[i] = gamma_[idx]; > + > + idx = std::min({ i * params.gainG / 64U, 1023U }); > + green_[i] = gamma_[idx]; > + > + idx = std::min({ i * params.gainB / 64U, 1023U }); > + blue_[i] = gamma_[idx]; > + } > + > + /* Copy metadata from the input buffer */ > + FrameMetadata &metadata = output->_d()->metadata(); > + metadata.status = input->metadata().status; > + metadata.sequence = input->metadata().sequence; > + metadata.timestamp = input->metadata().timestamp; > + > + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); > + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); > + if (!in.isValid() || !out.isValid()) { > + LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; > + metadata.status = FrameMetadata::FrameError; > + return; > + } > + > + stats_->startFrame(); > + > + if (inputConfig_.patternSize.height == 2) > + process2(in.planes()[0].data(), out.planes()[0].data()); > + else > + process4(in.planes()[0].data(), out.planes()[0].data()); > + > + metadata.planes()[0].bytesused = out.planes()[0].size(); > + > + /* Measure before emitting signals */ > + if (measuredFrames_ < DebayerCpu::framesToMeasure && > + ++measuredFrames_ > DebayerCpu::framesToSkip) { > + timespec frameEndTime = {}; > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime); > + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime); > + if (measuredFrames_ == DebayerCpu::framesToMeasure) { > + const int measuredFrames = DebayerCpu::framesToMeasure - > + DebayerCpu::framesToSkip; > + LOG(Debayer, Info) > + << "Processed " << measuredFrames > + << " frames in " << frameProcessTime_ / 1000 << "us, " > + << frameProcessTime_ / (1000 * measuredFrames) > + << " us/frame"; > + } > + } > + > + stats_->finishFrame(); > + outputBufferReady.emit(output); > + inputBufferReady.emit(input); The thread-bound calls problem here, Andrei already works on it. > +} > + > +} /* namespace libcamera */ > diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build > index d4ae5ac7..6d7a44d7 100644 > --- a/src/libcamera/software_isp/meson.build > +++ b/src/libcamera/software_isp/meson.build > @@ -2,6 +2,7 @@ > > libcamera_sources += files([ > 'debayer.cpp', > + 'debayer_cpu.cpp', > 'swstats.cpp', > 'swstats_cpu.cpp', > ])
On Tue, Jan 23, 2024 at 10:23:27PM +0100, Milan Zamazal wrote: > Hans de Goede <hdegoede@redhat.com> writes: > > > Add CPU based debayering implementation. This initial implementation > > only supports debayering packed 10 bits per pixel bayer data in > > the 4 standard bayer orders. > > > > Doxygen documentation by Dennis Bonke. > > > > Co-authored-by: Dennis Bonke <admin@dennisbonke.com> > > Signed-off-by: Dennis Bonke <admin@dennisbonke.com> > > Co-authored-by: Andrey Konovalov <andrey.konovalov@linaro.org> > > Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org> > > Co-authored-by: Pavel Machek <pavel@ucw.cz> > > Signed-off-by: Pavel Machek <pavel@ucw.cz> > > Signed-off-by: Hans de Goede <hdegoede@redhat.com> > > Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s > > Tested-by: Pavel Machek <pavel@ucw.cz> > > --- > > .../internal/software_isp/debayer_cpu.h | 131 +++++ > > .../internal/software_isp/meson.build | 1 + > > src/libcamera/software_isp/debayer_cpu.cpp | 528 ++++++++++++++++++ > > src/libcamera/software_isp/meson.build | 1 + > > 4 files changed, 661 insertions(+) > > create mode 100644 include/libcamera/internal/software_isp/debayer_cpu.h > > create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp > > > > diff --git a/include/libcamera/internal/software_isp/debayer_cpu.h b/include/libcamera/internal/software_isp/debayer_cpu.h > > new file mode 100644 > > index 00000000..78573f44 > > --- /dev/null > > +++ b/include/libcamera/internal/software_isp/debayer_cpu.h > > @@ -0,0 +1,131 @@ > > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > > +/* > > + * Copyright (C) 2023, Linaro Ltd > > + * Copyright (C) 2023, Red Hat Inc. > > + * > > + * Authors: > > + * Hans de Goede <hdegoede@redhat.com> > > + * > > + * debayer_cpu.h - CPU based debayering header > > + */ > > + > > +#pragma once > > + > > +#include <memory> > > +#include <stdint.h> > > +#include <vector> > > + > > +#include <libcamera/base/object.h> > > + > > +#include "libcamera/internal/software_isp/swstats_cpu.h" > > +#include "libcamera/internal/software_isp/debayer.h" > > + > > +namespace libcamera { > > + > > +/** > > + * \class DebayerCpu > > + * \brief Class for debayering on the CPU > > + * > > + * Implementation for CPU based debayering > > + */ > > +class DebayerCpu : public Debayer, public Object > > +{ > > +public: > > + /* > > + * FIXME this should be a plain (implementation independent) SwStats > > + * this can be fixed once getStats() is dropped. > > + */ > > + /** > > + * \brief Constructs a DebayerCpu object. > > + * \param[in] stats Pointer to the stats object to use. > > + */ > > + DebayerCpu(std::unique_ptr<SwStatsCpu> stats); > > + ~DebayerCpu(); > > + > > + /* > > + * Setup the Debayer object according to the passed in parameters. > > + * Return 0 on success, a negative errno value on failure > > + * (unsupported parameters). > > + */ > > + int configure(const StreamConfiguration &inputCfg, > > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs); > > + > > + /* > > + * Get width and height at which the bayer-pattern repeats. > > + * Return pattern-size or an empty Size for an unsupported inputFormat. > > + */ > > + Size patternSize(PixelFormat inputFormat); > > + > > + std::vector<PixelFormat> formats(PixelFormat input); > > + std::tuple<unsigned int, unsigned int> > > + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size); > > + > > + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params); > > + > > + /** > > + * \brief Get the file descriptor for the statistics. > > + * > > + * \return the file descriptor pointing to the statistics. > > + */ > > + const SharedFD &getStatsFD() { return stats_->getStatsFD(); } > > + > > + /** > > + * \brief Get the output frame size. > > + * > > + * \return The output frame size. > > + */ > > + unsigned int frameSize() { return outputConfig_.frameSize; } > > +private: > > + void initLinePointers(const uint8_t *linePointers[], const uint8_t *src); > > + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src); > > + void process2(const uint8_t *src, uint8_t *dst); > > + void process4(const uint8_t *src, uint8_t *dst); > > + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */ > > + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); > > + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); > > + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]); > > + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]); > > + > > + typedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]); > > + > > + struct DebayerInputConfig { > > + Size patternSize; > > + unsigned int bpp; /* Memory used per pixel, not precision */ > > + unsigned int stride; > > + std::vector<PixelFormat> outputFormats; > > + }; > > + > > + struct DebayerOutputConfig { > > + unsigned int bpp; /* Memory used per pixel, not precision */ > > + unsigned int stride; > > + unsigned int frameSize; > > + }; > > + > > + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config); > > + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config); > > + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat); > > + > > + uint8_t gamma_[1024]; > > + uint8_t red_[256]; > > + uint8_t green_[256]; > > + uint8_t blue_[256]; > > Named constants for the array sizes? And use std::array<> instead of fixed-size C arrays. The span class is useful too when passing references to arrays to functions. > > + debayerFn debayer0_; > > + debayerFn debayer1_; > > + debayerFn debayer2_; > > + debayerFn debayer3_; > > + Rectangle window_; > > + DebayerInputConfig inputConfig_; > > + DebayerOutputConfig outputConfig_; > > + std::unique_ptr<SwStatsCpu> stats_; > > + uint8_t *lineBuffers_[5]; > > A named constant for the array size? > > Also, the array size assumes a limit on the pattern height, which is reasonable > but not documented. > > > + unsigned int lineBufferIndex_; > > + bool enableInputMemcpy_; > > + float gamma_correction_; > > + int measuredFrames_; > > + int64_t frameProcessTime_; > > + /* Skip 30 frames for things to stabilize then measure 30 frames */ > > + static const int framesToSkip = 30; > > + static const int framesToMeasure = 60; > > Frames-to-measure is 30, a name like lastFrameToMeasure might be a bit clearer. constexpr, and we need constants with a k prefix, e.g. kFramesToSkip. > > +}; > > + > > +} /* namespace libcamera */ > > diff --git a/include/libcamera/internal/software_isp/meson.build b/include/libcamera/internal/software_isp/meson.build > > index 7e40925e..b5a0d737 100644 > > --- a/include/libcamera/internal/software_isp/meson.build > > +++ b/include/libcamera/internal/software_isp/meson.build > > @@ -2,6 +2,7 @@ > > > > libcamera_internal_headers += files([ > > 'debayer.h', > > + 'debayer_cpu.h', > > 'debayer_params.h', > > 'swisp_stats.h', > > 'swstats.h', > > diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp > > new file mode 100644 > > index 00000000..e0c3c658 > > --- /dev/null > > +++ b/src/libcamera/software_isp/debayer_cpu.cpp > > @@ -0,0 +1,528 @@ > > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > > +/* > > + * Copyright (C) 2023, Linaro Ltd > > + * Copyright (C) 2023, Red Hat Inc. > > + * > > + * Authors: > > + * Hans de Goede <hdegoede@redhat.com> > > + * > > + * debayer_cpu.cpp - CPU based debayering class > > + */ > > + > > +#include "libcamera/internal/software_isp/debayer_cpu.h" > > + > > +#include <math.h> > > +#include <stdlib.h> > > +#include <time.h> > > + > > +#include <libcamera/formats.h> > > + > > +#include "libcamera/internal/bayer_format.h" > > +#include "libcamera/internal/framebuffer.h" > > +#include "libcamera/internal/mapped_framebuffer.h" > > + > > +namespace libcamera { > > + > > +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats) > > + : stats_(std::move(stats)), gamma_correction_(1.0) > > +{ > > +#ifdef __x86_64__ > > + enableInputMemcpy_ = false; > > +#else > > + enableInputMemcpy_ = true; > > +#endif > > + /* Initialize gamma to 1.0 curve */ > > + for (int i = 0; i < 1024; i++) > > + gamma_[i] = i / 4; > > + > > + for (int i = 0; i < 5; i++) > > + lineBuffers_[i] = NULL; > > A question for C/C++ experts whether NULL or nullptr should be used here, with a > special consideration of free() calls below. nullptr, that's the C++ constant for null pointers. > > +} > > + > > +DebayerCpu::~DebayerCpu() > > +{ > > + for (int i = 0; i < 5; i++) > > + free(lineBuffers_[i]); > > +} > > + > > +// RGR > > +// GBG > > +// RGR > > +#define BGGR_BGR888(p, n, div) \ > > + *dst++ = blue_[curr[x] / (div)]; \ > > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ > > + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ > > + x++; > > + > > +// GBG > > +// RGR > > +// GBG > > +#define GRBG_BGR888(p, n, div) \ > > + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \ > > + *dst++ = green_[curr[x] / (div)]; \ > > + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ > > + x++; > > + > > +// GRG > > +// BGB > > +// GRG > > +#define GBRG_BGR888(p, n, div) \ > > + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ > > + *dst++ = green_[curr[x] / (div)]; \ > > + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \ > > + x++; > > + > > +// BGB > > +// GRG > > +// BGB > > +#define RGGB_BGR888(p, n, div) \ > > + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ > > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ > > + *dst++ = red_[curr[x] / (div)]; \ > > + x++; > > + > > +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) > > +{ > > + const int width_in_bytes = window_.width * 5 / 4; > > + const uint8_t *prev = (const uint8_t *)src[0]; > > + const uint8_t *curr = (const uint8_t *)src[1]; > > + const uint8_t *next = (const uint8_t *)src[2]; > > + > > + /* > > + * For the first pixel getting a pixel from the previous column uses > > + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels. > > + * Same for last pixel (uses x + 2) and looking at the next column. > > + * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed. > > + */ > > + for (int x = 0; x < width_in_bytes; x++) { > > + /* Even pixel */ > > + BGGR_BGR888(2, 1, 1) > > + /* Odd pixel BGGR -> GBRG */ > > + GBRG_BGR888(1, 1, 1) > > + /* Same thing for next 2 pixels */ > > + BGGR_BGR888(1, 1, 1) > > + GBRG_BGR888(1, 2, 1) > > "Even" and "odd" is a bit confusing here. How about "first", "second", "third", "fourth"? > > > + } > > +} > > + > > +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) > > +{ > > + const int width_in_bytes = window_.width * 5 / 4; > > + const uint8_t *prev = (const uint8_t *)src[0]; > > + const uint8_t *curr = (const uint8_t *)src[1]; > > + const uint8_t *next = (const uint8_t *)src[2]; > > + > > + for (int x = 0; x < width_in_bytes; x++) { > > + /* Even pixel */ > > + GRBG_BGR888(2, 1, 1) > > + /* Odd pixel GRBG -> RGGB */ > > + RGGB_BGR888(1, 1, 1) > > + /* Same thing for next 2 pixels */ > > + GRBG_BGR888(1, 1, 1) > > + RGGB_BGR888(1, 2, 1) > > + } > > +} > > + > > +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]) > > +{ > > + const int width_in_bytes = window_.width * 5 / 4; > > + const uint8_t *prev = (const uint8_t *)src[0]; > > + const uint8_t *curr = (const uint8_t *)src[1]; > > + const uint8_t *next = (const uint8_t *)src[2]; > > + > > + for (int x = 0; x < width_in_bytes; x++) { > > + /* Even pixel */ > > + GBRG_BGR888(2, 1, 1) > > + /* Odd pixel GBGR -> BGGR */ > > + BGGR_BGR888(1, 1, 1) > > + /* Same thing for next 2 pixels */ > > + GBRG_BGR888(1, 1, 1) > > + BGGR_BGR888(1, 2, 1) > > + } > > +} > > + > > +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]) > > +{ > > + const int width_in_bytes = window_.width * 5 / 4; > > + const uint8_t *prev = (const uint8_t *)src[0]; > > + const uint8_t *curr = (const uint8_t *)src[1]; > > + const uint8_t *next = (const uint8_t *)src[2]; > > + > > + for (int x = 0; x < width_in_bytes; x++) { > > + /* Even pixel */ > > + RGGB_BGR888(2, 1, 1) > > + /* Odd pixel RGGB -> GRBG*/ > > + GRBG_BGR888(1, 1, 1) > > + /* Same thing for next 2 pixels */ > > + RGGB_BGR888(1, 1, 1) > > + GRBG_BGR888(1, 2, 1) > > + } > > +} > > + > > +static bool isStandardBayerOrder(BayerFormat::Order order) > > +{ > > + return order == BayerFormat::BGGR || order == BayerFormat::GBRG || > > + order == BayerFormat::GRBG || order == BayerFormat::RGGB; > > +} > > + > > +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config) > > +{ > > + BayerFormat bayerFormat = > > + BayerFormat::fromPixelFormat(inputFormat); > > + > > + if (bayerFormat.bitDepth == 10 && > > + bayerFormat.packing == BayerFormat::Packing::CSI2 && > > + isStandardBayerOrder(bayerFormat.order)) { > > + config.bpp = 10; > > + config.patternSize.width = 4; /* 5 bytes per *4* pixels */ > > + config.patternSize.height = 2; > > + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 }); > > + return 0; > > + } > > + > > + LOG(Debayer, Info) > > + << "Unsupported input format " << inputFormat.toString(); > > + return -EINVAL; > > +} > > + > > +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config) > > +{ > > + if (outputFormat == formats::RGB888) { > > + config.bpp = 24; > > + return 0; > > + } > > + > > + LOG(Debayer, Info) > > + << "Unsupported output format " << outputFormat.toString(); > > + return -EINVAL; > > +} > > + > > +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */ > > +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat) > > +{ > > + BayerFormat bayerFormat = > > + BayerFormat::fromPixelFormat(inputFormat); > > + > > + if (bayerFormat.bitDepth == 10 && > > + bayerFormat.packing == BayerFormat::Packing::CSI2) { > > + switch (bayerFormat.order) { > > + case BayerFormat::BGGR: > > + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888; > > + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888; > > + return 0; > > + case BayerFormat::GBRG: > > + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888; > > + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888; > > + return 0; > > + case BayerFormat::GRBG: > > + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888; > > + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888; > > + return 0; > > + case BayerFormat::RGGB: > > + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888; > > + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888; > > + return 0; > > + default: > > + break; > > + } > > + } > > + > > + LOG(Debayer, Error) << "Unsupported input output format combination"; > > + return -EINVAL; > > +} > > + > > +int DebayerCpu::configure(const StreamConfiguration &inputCfg, > > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) > > +{ > > + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) > > + return -EINVAL; > > + > > + if (stats_->configure(inputCfg) != 0) > > + return -EINVAL; > > + > > + const Size &stats_pattern_size = stats_->patternSize(); > > + if (inputConfig_.patternSize.width != stats_pattern_size.width || > > + inputConfig_.patternSize.height != stats_pattern_size.height) { > > + LOG(Debayer, Error) > > + << "mismatching stats and debayer pattern sizes for " > > + << inputCfg.pixelFormat.toString(); > > + return -EINVAL; > > + } > > + > > + inputConfig_.stride = inputCfg.stride; > > + > > + if (outputCfgs.size() != 1) { > > + LOG(Debayer, Error) > > + << "Unsupported number of output streams: " > > + << outputCfgs.size(); > > + return -EINVAL; > > + } > > + > > + const StreamConfiguration &outputCfg = outputCfgs[0]; > > + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); > > + std::tie(outputConfig_.stride, outputConfig_.frameSize) = > > + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); > > + > > + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { > > + LOG(Debayer, Error) > > + << "Invalid output size/stride: " > > + << "\n " << outputCfg.size << " (" << outSizeRange << ")" > > + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; > > + return -EINVAL; > > + } > > + > > + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0) > > + return -EINVAL; > > + > > + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & > > + ~(inputConfig_.patternSize.width - 1); > > + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & > > + ~(inputConfig_.patternSize.height - 1); > > *Implicit* assumption that width and height are powers of 2? > > > + window_.width = outputCfg.size.width; > > + window_.height = outputCfg.size.height; > > + > > + /* Don't pass x,y since process() already adjusts src before passing it */ > > + stats_->setWindow(Rectangle(window_.size())); > > + > > + for (unsigned int i = 0; > > + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_; > > + i++) { > > + /* pad with patternSize.Width on both left and right side */ > > + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * > > + inputConfig_.bpp / 8; > > + > > + free(lineBuffers_[i]); > > + lineBuffers_[i] = (uint8_t *)malloc(lineLength); > > + if (!lineBuffers_[i]) > > + return -ENOMEM; > > What happens if malloc above fails and then free in the DebayerCpu destructor is > called? Make lineBuffers_ a std::array<std::vector<uint8_t>, 5> and you won't have memory leaks. > > + } > > + > > + measuredFrames_ = 0; > > + frameProcessTime_ = 0; > > + > > + return 0; > > +} > > + > > +Size DebayerCpu::patternSize(PixelFormat inputFormat) > > +{ > > + DebayerCpu::DebayerInputConfig config; > > + > > + if (getInputConfig(inputFormat, config) != 0) > > + return {}; > > + > > + return config.patternSize; > > +} > > + > > +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat) > > +{ > > + DebayerCpu::DebayerInputConfig config; > > + > > + if (getInputConfig(inputFormat, config) != 0) > > + return std::vector<PixelFormat>(); > > + > > + return config.outputFormats; > > +} > > + > > +std::tuple<unsigned int, unsigned int> > > +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) > > +{ > > + DebayerCpu::DebayerOutputConfig config; > > + > > + if (getOutputConfig(outputFormat, config) != 0) > > + return std::make_tuple(0, 0); > > + > > + /* round up to multiple of 8 for 64 bits alignment */ > > + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7; > > + > > + return std::make_tuple(stride, stride * size.height); > > +} > > + > > +void DebayerCpu::initLinePointers(const uint8_t *linePointers[], const uint8_t *src) > > +{ > > + const int patternHeight = inputConfig_.patternSize.height; > > + > > + for (int i = 0; i < patternHeight; i++) > > + linePointers[i + 1] = src + > > + (-patternHeight / 2 + i) * (int)inputConfig_.stride; > > + > > + if (!enableInputMemcpy_) > > + return; > > + > > + for (int i = 0; i < patternHeight; i++) { > > + /* pad with patternSize.Width on both left and right side */ > > + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * > > + inputConfig_.bpp / 8; > > + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8; > > The code/pattern duplication in lineLength and padding computation above and > below calls for inline helper functions. > > > + > > + memcpy(lineBuffers_[i], linePointers[i + 1] - padding, lineLength); > > + linePointers[i + 1] = lineBuffers_[i] + padding; > > + } > > + > > + /* Point lineBufferIndex_ to first unused lineBuffer */ > > + lineBufferIndex_ = patternHeight; > > Tracking all the memory locations here and checking their correctness is > difficult. Not sure how to make it easier to understand, maybe more local > variables or comments? > > > +} > > + > > +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src) > > +{ > > + const int patternHeight = inputConfig_.patternSize.height; > > + > > + for (int i = 0; i < patternHeight; i++) > > + linePointers[i] = linePointers[i + 1]; > > + > > + linePointers[patternHeight] = src + > > + (patternHeight / 2) * (int)inputConfig_.stride; > > + > > + if (!enableInputMemcpy_) > > + return; > > + > > + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * > > + inputConfig_.bpp / 8; > > + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8; > > + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - padding, lineLength); > > + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + padding; > > + > > + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1); > > +} > > + > > +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst) > > +{ > > + const unsigned int y_end = window_.y + window_.height; > > + const uint8_t *linePointers[3]; > > + > > + /* Adjust src to top left corner of the window */ > > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; > > + > > + initLinePointers(linePointers, src); > > + > > + for (unsigned int y = window_.y; y < y_end; y += 2) { > > + shiftLinePointers(linePointers, src); > > + stats_->processLine0(y, linePointers); > > + (this->*debayer0_)(dst, linePointers); > > + src += inputConfig_.stride; > > + dst += outputConfig_.stride; > > + > > + shiftLinePointers(linePointers, src); > > + (this->*debayer1_)(dst, linePointers); > > + src += inputConfig_.stride; > > + dst += outputConfig_.stride; > > + } > > +} > > + > > +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst) > > +{ > > + const unsigned int y_end = window_.y + window_.height; > > + const uint8_t *linePointers[5]; > > + > > + /* Adjust src to top left corner of the window */ > > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; > > + > > + initLinePointers(linePointers, src); > > + > > + for (unsigned int y = window_.y; y < y_end; y += 4) { > > + shiftLinePointers(linePointers, src); > > + stats_->processLine0(y, linePointers); > > + (this->*debayer0_)(dst, linePointers); > > + src += inputConfig_.stride; > > + dst += outputConfig_.stride; > > + > > + shiftLinePointers(linePointers, src); > > + (this->*debayer1_)(dst, linePointers); > > + src += inputConfig_.stride; > > + dst += outputConfig_.stride; > > + > > + shiftLinePointers(linePointers, src); > > + stats_->processLine2(y, linePointers); > > + (this->*debayer2_)(dst, linePointers); > > + src += inputConfig_.stride; > > + dst += outputConfig_.stride; > > + > > + shiftLinePointers(linePointers, src); > > + (this->*debayer3_)(dst, linePointers); > > + src += inputConfig_.stride; > > + dst += outputConfig_.stride; > > + } > > +} > > + > > +static inline int64_t timeDiff(timespec &after, timespec &before) > > +{ > > + return (after.tv_sec - before.tv_sec) * 1000000000LL + > > + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec; > > +} > > + > > +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) > > +{ > > + timespec frameStartTime; > > + > > + if (measuredFrames_ < DebayerCpu::framesToMeasure) { > > + frameStartTime = {}; > > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime); > > + } > > + > > + /* Apply DebayerParams */ > > + if (params.gamma != gamma_correction_) { > > + for (int i = 0; i < 1024; i++) > > + gamma_[i] = 255 * powf(i / 1023.0, params.gamma); > > + > > + gamma_correction_ = params.gamma; > > + } > > + > > + for (int i = 0; i < 256; i++) { > > + int idx; > > + > > + /* Apply gamma after gain! */ > > + idx = std::min({ i * params.gainR / 64U, 1023U }); > > + red_[i] = gamma_[idx]; > > + > > + idx = std::min({ i * params.gainG / 64U, 1023U }); > > + green_[i] = gamma_[idx]; > > + > > + idx = std::min({ i * params.gainB / 64U, 1023U }); > > + blue_[i] = gamma_[idx]; > > + } > > + > > + /* Copy metadata from the input buffer */ > > + FrameMetadata &metadata = output->_d()->metadata(); > > + metadata.status = input->metadata().status; > > + metadata.sequence = input->metadata().sequence; > > + metadata.timestamp = input->metadata().timestamp; > > + > > + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); > > + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); > > + if (!in.isValid() || !out.isValid()) { > > + LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; > > + metadata.status = FrameMetadata::FrameError; > > + return; > > + } > > + > > + stats_->startFrame(); > > + > > + if (inputConfig_.patternSize.height == 2) > > + process2(in.planes()[0].data(), out.planes()[0].data()); > > + else > > + process4(in.planes()[0].data(), out.planes()[0].data()); > > + > > + metadata.planes()[0].bytesused = out.planes()[0].size(); > > + > > + /* Measure before emitting signals */ > > + if (measuredFrames_ < DebayerCpu::framesToMeasure && > > + ++measuredFrames_ > DebayerCpu::framesToSkip) { > > + timespec frameEndTime = {}; > > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime); > > + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime); > > + if (measuredFrames_ == DebayerCpu::framesToMeasure) { > > + const int measuredFrames = DebayerCpu::framesToMeasure - > > + DebayerCpu::framesToSkip; > > + LOG(Debayer, Info) > > + << "Processed " << measuredFrames > > + << " frames in " << frameProcessTime_ / 1000 << "us, " > > + << frameProcessTime_ / (1000 * measuredFrames) > > + << " us/frame"; > > + } > > + } > > + > > + stats_->finishFrame(); > > + outputBufferReady.emit(output); > > + inputBufferReady.emit(input); > > The thread-bound calls problem here, Andrei already works on it. > > > +} > > + > > +} /* namespace libcamera */ > > diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build > > index d4ae5ac7..6d7a44d7 100644 > > --- a/src/libcamera/software_isp/meson.build > > +++ b/src/libcamera/software_isp/meson.build > > @@ -2,6 +2,7 @@ > > > > libcamera_sources += files([ > > 'debayer.cpp', > > + 'debayer_cpu.cpp', > > 'swstats.cpp', > > 'swstats_cpu.cpp', > > ])
Hi Milan, Thank you for the review I've addressed all your comments for v3. On 1/23/24 22:23, Milan Zamazal wrote: > Hans de Goede <hdegoede@redhat.com> writes: <snip> >> +int DebayerCpu::configure(const StreamConfiguration &inputCfg, >> + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) >> +{ >> + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) >> + return -EINVAL; >> + >> + if (stats_->configure(inputCfg) != 0) >> + return -EINVAL; >> + >> + const Size &stats_pattern_size = stats_->patternSize(); >> + if (inputConfig_.patternSize.width != stats_pattern_size.width || >> + inputConfig_.patternSize.height != stats_pattern_size.height) { >> + LOG(Debayer, Error) >> + << "mismatching stats and debayer pattern sizes for " >> + << inputCfg.pixelFormat.toString(); >> + return -EINVAL; >> + } >> + >> + inputConfig_.stride = inputCfg.stride; >> + >> + if (outputCfgs.size() != 1) { >> + LOG(Debayer, Error) >> + << "Unsupported number of output streams: " >> + << outputCfgs.size(); >> + return -EINVAL; >> + } >> + >> + const StreamConfiguration &outputCfg = outputCfgs[0]; >> + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); >> + std::tie(outputConfig_.stride, outputConfig_.frameSize) = >> strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); >> + >> + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { >> + LOG(Debayer, Error) >> + << "Invalid output size/stride: " >> + << "\n " << outputCfg.size << " (" << outSizeRange << ")" >> + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; >> + return -EINVAL; >> + } >> + >> + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0) >> + return -EINVAL; >> + >> + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & >> + ~(inputConfig_.patternSize.width - 1); >> + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & >> + ~(inputConfig_.patternSize.height - 1); > > *Implicit* assumption that width and height are powers of 2? > >> + window_.width = outputCfg.size.width; >> + window_.height = outputCfg.size.height; >> + >> + /* Don't pass x,y since process() already adjusts src before passing it */ >> + stats_->setWindow(Rectangle(window_.size())); >> + >> + for (unsigned int i = 0; >> + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_; >> + i++) { >> + /* pad with patternSize.Width on both left and right side */ >> + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * >> + inputConfig_.bpp / 8; >> + >> + free(lineBuffers_[i]); >> + lineBuffers_[i] = (uint8_t *)malloc(lineLength); >> + if (!lineBuffers_[i]) >> + return -ENOMEM; > > What happens if malloc above fails and then free in the DebayerCpu destructor is > called? free(nullptr) is a a no-op, so this scenario should not cause any problems. Regards, Hans
diff --git a/include/libcamera/internal/software_isp/debayer_cpu.h b/include/libcamera/internal/software_isp/debayer_cpu.h new file mode 100644 index 00000000..78573f44 --- /dev/null +++ b/include/libcamera/internal/software_isp/debayer_cpu.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * debayer_cpu.h - CPU based debayering header + */ + +#pragma once + +#include <memory> +#include <stdint.h> +#include <vector> + +#include <libcamera/base/object.h> + +#include "libcamera/internal/software_isp/swstats_cpu.h" +#include "libcamera/internal/software_isp/debayer.h" + +namespace libcamera { + +/** + * \class DebayerCpu + * \brief Class for debayering on the CPU + * + * Implementation for CPU based debayering + */ +class DebayerCpu : public Debayer, public Object +{ +public: + /* + * FIXME this should be a plain (implementation independent) SwStats + * this can be fixed once getStats() is dropped. + */ + /** + * \brief Constructs a DebayerCpu object. + * \param[in] stats Pointer to the stats object to use. + */ + DebayerCpu(std::unique_ptr<SwStatsCpu> stats); + ~DebayerCpu(); + + /* + * Setup the Debayer object according to the passed in parameters. + * Return 0 on success, a negative errno value on failure + * (unsupported parameters). + */ + int configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs); + + /* + * Get width and height at which the bayer-pattern repeats. + * Return pattern-size or an empty Size for an unsupported inputFormat. + */ + Size patternSize(PixelFormat inputFormat); + + std::vector<PixelFormat> formats(PixelFormat input); + std::tuple<unsigned int, unsigned int> + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size); + + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params); + + /** + * \brief Get the file descriptor for the statistics. + * + * \return the file descriptor pointing to the statistics. + */ + const SharedFD &getStatsFD() { return stats_->getStatsFD(); } + + /** + * \brief Get the output frame size. + * + * \return The output frame size. + */ + unsigned int frameSize() { return outputConfig_.frameSize; } +private: + void initLinePointers(const uint8_t *linePointers[], const uint8_t *src); + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src); + void process2(const uint8_t *src, uint8_t *dst); + void process4(const uint8_t *src, uint8_t *dst); + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */ + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]); + + typedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]); + + struct DebayerInputConfig { + Size patternSize; + unsigned int bpp; /* Memory used per pixel, not precision */ + unsigned int stride; + std::vector<PixelFormat> outputFormats; + }; + + struct DebayerOutputConfig { + unsigned int bpp; /* Memory used per pixel, not precision */ + unsigned int stride; + unsigned int frameSize; + }; + + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config); + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config); + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat); + + uint8_t gamma_[1024]; + uint8_t red_[256]; + uint8_t green_[256]; + uint8_t blue_[256]; + debayerFn debayer0_; + debayerFn debayer1_; + debayerFn debayer2_; + debayerFn debayer3_; + Rectangle window_; + DebayerInputConfig inputConfig_; + DebayerOutputConfig outputConfig_; + std::unique_ptr<SwStatsCpu> stats_; + uint8_t *lineBuffers_[5]; + unsigned int lineBufferIndex_; + bool enableInputMemcpy_; + float gamma_correction_; + int measuredFrames_; + int64_t frameProcessTime_; + /* Skip 30 frames for things to stabilize then measure 30 frames */ + static const int framesToSkip = 30; + static const int framesToMeasure = 60; +}; + +} /* namespace libcamera */ diff --git a/include/libcamera/internal/software_isp/meson.build b/include/libcamera/internal/software_isp/meson.build index 7e40925e..b5a0d737 100644 --- a/include/libcamera/internal/software_isp/meson.build +++ b/include/libcamera/internal/software_isp/meson.build @@ -2,6 +2,7 @@ libcamera_internal_headers += files([ 'debayer.h', + 'debayer_cpu.h', 'debayer_params.h', 'swisp_stats.h', 'swstats.h', diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp new file mode 100644 index 00000000..e0c3c658 --- /dev/null +++ b/src/libcamera/software_isp/debayer_cpu.cpp @@ -0,0 +1,528 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * debayer_cpu.cpp - CPU based debayering class + */ + +#include "libcamera/internal/software_isp/debayer_cpu.h" + +#include <math.h> +#include <stdlib.h> +#include <time.h> + +#include <libcamera/formats.h> + +#include "libcamera/internal/bayer_format.h" +#include "libcamera/internal/framebuffer.h" +#include "libcamera/internal/mapped_framebuffer.h" + +namespace libcamera { + +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats) + : stats_(std::move(stats)), gamma_correction_(1.0) +{ +#ifdef __x86_64__ + enableInputMemcpy_ = false; +#else + enableInputMemcpy_ = true; +#endif + /* Initialize gamma to 1.0 curve */ + for (int i = 0; i < 1024; i++) + gamma_[i] = i / 4; + + for (int i = 0; i < 5; i++) + lineBuffers_[i] = NULL; +} + +DebayerCpu::~DebayerCpu() +{ + for (int i = 0; i < 5; i++) + free(lineBuffers_[i]); +} + +// RGR +// GBG +// RGR +#define BGGR_BGR888(p, n, div) \ + *dst++ = blue_[curr[x] / (div)]; \ + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ + x++; + +// GBG +// RGR +// GBG +#define GRBG_BGR888(p, n, div) \ + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \ + *dst++ = green_[curr[x] / (div)]; \ + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ + x++; + +// GRG +// BGB +// GRG +#define GBRG_BGR888(p, n, div) \ + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ + *dst++ = green_[curr[x] / (div)]; \ + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \ + x++; + +// BGB +// GRG +// BGB +#define RGGB_BGR888(p, n, div) \ + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ + *dst++ = red_[curr[x] / (div)]; \ + x++; + +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + /* + * For the first pixel getting a pixel from the previous column uses + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels. + * Same for last pixel (uses x + 2) and looking at the next column. + * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed. + */ + for (int x = 0; x < width_in_bytes; x++) { + /* Even pixel */ + BGGR_BGR888(2, 1, 1) + /* Odd pixel BGGR -> GBRG */ + GBRG_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + BGGR_BGR888(1, 1, 1) + GBRG_BGR888(1, 2, 1) + } +} + +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + for (int x = 0; x < width_in_bytes; x++) { + /* Even pixel */ + GRBG_BGR888(2, 1, 1) + /* Odd pixel GRBG -> RGGB */ + RGGB_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + GRBG_BGR888(1, 1, 1) + RGGB_BGR888(1, 2, 1) + } +} + +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + for (int x = 0; x < width_in_bytes; x++) { + /* Even pixel */ + GBRG_BGR888(2, 1, 1) + /* Odd pixel GBGR -> BGGR */ + BGGR_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + GBRG_BGR888(1, 1, 1) + BGGR_BGR888(1, 2, 1) + } +} + +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + for (int x = 0; x < width_in_bytes; x++) { + /* Even pixel */ + RGGB_BGR888(2, 1, 1) + /* Odd pixel RGGB -> GRBG*/ + GRBG_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + RGGB_BGR888(1, 1, 1) + GRBG_BGR888(1, 2, 1) + } +} + +static bool isStandardBayerOrder(BayerFormat::Order order) +{ + return order == BayerFormat::BGGR || order == BayerFormat::GBRG || + order == BayerFormat::GRBG || order == BayerFormat::RGGB; +} + +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputFormat); + + if (bayerFormat.bitDepth == 10 && + bayerFormat.packing == BayerFormat::Packing::CSI2 && + isStandardBayerOrder(bayerFormat.order)) { + config.bpp = 10; + config.patternSize.width = 4; /* 5 bytes per *4* pixels */ + config.patternSize.height = 2; + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 }); + return 0; + } + + LOG(Debayer, Info) + << "Unsupported input format " << inputFormat.toString(); + return -EINVAL; +} + +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config) +{ + if (outputFormat == formats::RGB888) { + config.bpp = 24; + return 0; + } + + LOG(Debayer, Info) + << "Unsupported output format " << outputFormat.toString(); + return -EINVAL; +} + +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */ +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputFormat); + + if (bayerFormat.bitDepth == 10 && + bayerFormat.packing == BayerFormat::Packing::CSI2) { + switch (bayerFormat.order) { + case BayerFormat::BGGR: + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888; + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888; + return 0; + case BayerFormat::GBRG: + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888; + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888; + return 0; + case BayerFormat::GRBG: + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888; + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888; + return 0; + case BayerFormat::RGGB: + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888; + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888; + return 0; + default: + break; + } + } + + LOG(Debayer, Error) << "Unsupported input output format combination"; + return -EINVAL; +} + +int DebayerCpu::configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) +{ + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) + return -EINVAL; + + if (stats_->configure(inputCfg) != 0) + return -EINVAL; + + const Size &stats_pattern_size = stats_->patternSize(); + if (inputConfig_.patternSize.width != stats_pattern_size.width || + inputConfig_.patternSize.height != stats_pattern_size.height) { + LOG(Debayer, Error) + << "mismatching stats and debayer pattern sizes for " + << inputCfg.pixelFormat.toString(); + return -EINVAL; + } + + inputConfig_.stride = inputCfg.stride; + + if (outputCfgs.size() != 1) { + LOG(Debayer, Error) + << "Unsupported number of output streams: " + << outputCfgs.size(); + return -EINVAL; + } + + const StreamConfiguration &outputCfg = outputCfgs[0]; + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); + std::tie(outputConfig_.stride, outputConfig_.frameSize) = + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); + + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { + LOG(Debayer, Error) + << "Invalid output size/stride: " + << "\n " << outputCfg.size << " (" << outSizeRange << ")" + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; + return -EINVAL; + } + + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0) + return -EINVAL; + + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & + ~(inputConfig_.patternSize.width - 1); + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & + ~(inputConfig_.patternSize.height - 1); + window_.width = outputCfg.size.width; + window_.height = outputCfg.size.height; + + /* Don't pass x,y since process() already adjusts src before passing it */ + stats_->setWindow(Rectangle(window_.size())); + + for (unsigned int i = 0; + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_; + i++) { + /* pad with patternSize.Width on both left and right side */ + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * + inputConfig_.bpp / 8; + + free(lineBuffers_[i]); + lineBuffers_[i] = (uint8_t *)malloc(lineLength); + if (!lineBuffers_[i]) + return -ENOMEM; + } + + measuredFrames_ = 0; + frameProcessTime_ = 0; + + return 0; +} + +Size DebayerCpu::patternSize(PixelFormat inputFormat) +{ + DebayerCpu::DebayerInputConfig config; + + if (getInputConfig(inputFormat, config) != 0) + return {}; + + return config.patternSize; +} + +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat) +{ + DebayerCpu::DebayerInputConfig config; + + if (getInputConfig(inputFormat, config) != 0) + return std::vector<PixelFormat>(); + + return config.outputFormats; +} + +std::tuple<unsigned int, unsigned int> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) +{ + DebayerCpu::DebayerOutputConfig config; + + if (getOutputConfig(outputFormat, config) != 0) + return std::make_tuple(0, 0); + + /* round up to multiple of 8 for 64 bits alignment */ + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7; + + return std::make_tuple(stride, stride * size.height); +} + +void DebayerCpu::initLinePointers(const uint8_t *linePointers[], const uint8_t *src) +{ + const int patternHeight = inputConfig_.patternSize.height; + + for (int i = 0; i < patternHeight; i++) + linePointers[i + 1] = src + + (-patternHeight / 2 + i) * (int)inputConfig_.stride; + + if (!enableInputMemcpy_) + return; + + for (int i = 0; i < patternHeight; i++) { + /* pad with patternSize.Width on both left and right side */ + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * + inputConfig_.bpp / 8; + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8; + + memcpy(lineBuffers_[i], linePointers[i + 1] - padding, lineLength); + linePointers[i + 1] = lineBuffers_[i] + padding; + } + + /* Point lineBufferIndex_ to first unused lineBuffer */ + lineBufferIndex_ = patternHeight; +} + +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src) +{ + const int patternHeight = inputConfig_.patternSize.height; + + for (int i = 0; i < patternHeight; i++) + linePointers[i] = linePointers[i + 1]; + + linePointers[patternHeight] = src + + (patternHeight / 2) * (int)inputConfig_.stride; + + if (!enableInputMemcpy_) + return; + + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) * + inputConfig_.bpp / 8; + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8; + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - padding, lineLength); + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + padding; + + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1); +} + +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst) +{ + const unsigned int y_end = window_.y + window_.height; + const uint8_t *linePointers[3]; + + /* Adjust src to top left corner of the window */ + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; + + initLinePointers(linePointers, src); + + for (unsigned int y = window_.y; y < y_end; y += 2) { + shiftLinePointers(linePointers, src); + stats_->processLine0(y, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } +} + +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst) +{ + const unsigned int y_end = window_.y + window_.height; + const uint8_t *linePointers[5]; + + /* Adjust src to top left corner of the window */ + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; + + initLinePointers(linePointers, src); + + for (unsigned int y = window_.y; y < y_end; y += 4) { + shiftLinePointers(linePointers, src); + stats_->processLine0(y, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + stats_->processLine2(y, linePointers); + (this->*debayer2_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + (this->*debayer3_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } +} + +static inline int64_t timeDiff(timespec &after, timespec &before) +{ + return (after.tv_sec - before.tv_sec) * 1000000000LL + + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec; +} + +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) +{ + timespec frameStartTime; + + if (measuredFrames_ < DebayerCpu::framesToMeasure) { + frameStartTime = {}; + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime); + } + + /* Apply DebayerParams */ + if (params.gamma != gamma_correction_) { + for (int i = 0; i < 1024; i++) + gamma_[i] = 255 * powf(i / 1023.0, params.gamma); + + gamma_correction_ = params.gamma; + } + + for (int i = 0; i < 256; i++) { + int idx; + + /* Apply gamma after gain! */ + idx = std::min({ i * params.gainR / 64U, 1023U }); + red_[i] = gamma_[idx]; + + idx = std::min({ i * params.gainG / 64U, 1023U }); + green_[i] = gamma_[idx]; + + idx = std::min({ i * params.gainB / 64U, 1023U }); + blue_[i] = gamma_[idx]; + } + + /* Copy metadata from the input buffer */ + FrameMetadata &metadata = output->_d()->metadata(); + metadata.status = input->metadata().status; + metadata.sequence = input->metadata().sequence; + metadata.timestamp = input->metadata().timestamp; + + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); + if (!in.isValid() || !out.isValid()) { + LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; + metadata.status = FrameMetadata::FrameError; + return; + } + + stats_->startFrame(); + + if (inputConfig_.patternSize.height == 2) + process2(in.planes()[0].data(), out.planes()[0].data()); + else + process4(in.planes()[0].data(), out.planes()[0].data()); + + metadata.planes()[0].bytesused = out.planes()[0].size(); + + /* Measure before emitting signals */ + if (measuredFrames_ < DebayerCpu::framesToMeasure && + ++measuredFrames_ > DebayerCpu::framesToSkip) { + timespec frameEndTime = {}; + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime); + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime); + if (measuredFrames_ == DebayerCpu::framesToMeasure) { + const int measuredFrames = DebayerCpu::framesToMeasure - + DebayerCpu::framesToSkip; + LOG(Debayer, Info) + << "Processed " << measuredFrames + << " frames in " << frameProcessTime_ / 1000 << "us, " + << frameProcessTime_ / (1000 * measuredFrames) + << " us/frame"; + } + } + + stats_->finishFrame(); + outputBufferReady.emit(output); + inputBufferReady.emit(input); +} + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build index d4ae5ac7..6d7a44d7 100644 --- a/src/libcamera/software_isp/meson.build +++ b/src/libcamera/software_isp/meson.build @@ -2,6 +2,7 @@ libcamera_sources += files([ 'debayer.cpp', + 'debayer_cpu.cpp', 'swstats.cpp', 'swstats_cpu.cpp', ])