[{"id":28674,"web_url":"https://patchwork.libcamera.org/comment/28674/","msgid":"<87bk8hd9ng.fsf@redhat.com>","date":"2024-02-15T15:33:55","subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","submitter":{"id":177,"url":"https://patchwork.libcamera.org/api/people/177/","name":"Milan Zamazal","email":"mzamazal@redhat.com"},"content":"Hans de Goede <hdegoede@redhat.com> writes:\n\n> Add CPU based debayering implementation. This initial implementation\n> only supports debayering packed 10 bits per pixel bayer data in\n> the 4 standard bayer orders.\n>\n> Doxygen documentation by Dennis Bonke.\n>\n> Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s\n> Tested-by: Pavel Machek <pavel@ucw.cz>\n> Reviewed-by: Pavel Machek <pavel@ucw.cz>\n> Co-developed-by: Dennis Bonke <admin@dennisbonke.com>\n> Signed-off-by: Dennis Bonke <admin@dennisbonke.com>\n> Co-developed-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n> Co-developed-by: Pavel Machek <pavel@ucw.cz>\n> Signed-off-by: Pavel Machek <pavel@ucw.cz>\n> Signed-off-by: Hans de Goede <hdegoede@redhat.com>\n> ---\n> Changes in v3:\n> - Move debayer_cpu.h to src/libcamera/software_isp/\n> - Move documentation to .cpp file\n> - Document how/why an array of src pointers is passed to\n>   the debayer functions\n> ---\n>  src/libcamera/software_isp/debayer_cpu.cpp | 619 +++++++++++++++++++++\n>  src/libcamera/software_isp/debayer_cpu.h   | 143 +++++\n>  src/libcamera/software_isp/meson.build     |   1 +\n>  3 files changed, 763 insertions(+)\n>  create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp\n>  create mode 100644 src/libcamera/software_isp/debayer_cpu.h\n>\n> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp\n> new file mode 100644\n> index 00000000..53e90776\n> --- /dev/null\n> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n> @@ -0,0 +1,619 @@\n> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n> +/*\n> + * Copyright (C) 2023, Linaro Ltd\n> + * Copyright (C) 2023, Red Hat Inc.\n> + *\n> + * Authors:\n> + * Hans de Goede <hdegoede@redhat.com>\n> + *\n> + * debayer_cpu.cpp - CPU based debayering class\n> + */\n> +\n> +#include \"debayer_cpu.h\"\n> +\n> +#include <math.h>\n> +#include <stdlib.h>\n> +#include <time.h>\n> +\n> +#include <libcamera/formats.h>\n> +\n> +#include \"libcamera/internal/bayer_format.h\"\n> +#include \"libcamera/internal/framebuffer.h\"\n> +#include \"libcamera/internal/mapped_framebuffer.h\"\n> +\n> +namespace libcamera {\n> +\n> +/**\n> + * \\class DebayerCpu\n> + * \\brief Class for debayering on the CPU\n> + *\n> + * Implementation for CPU based debayering\n> + */\n> +\n> +/**\n> + * \\brief Constructs a DebayerCpu object.\n> + * \\param[in] stats Pointer to the stats object to use.\n> + */\n> +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)\n> +\t: stats_(std::move(stats)), gamma_correction_(1.0)\n> +{\n> +#ifdef __x86_64__\n> +\tenableInputMemcpy_ = false;\n> +#else\n> +\tenableInputMemcpy_ = true;\n> +#endif\n> +\t/* Initialize gamma to 1.0 curve */\n> +\tfor (unsigned int i = 0; i < kGammaLookupSize; i++)\n> +\t\tgamma_[i] = i / 4;\n> +\n> +\tfor (unsigned int i = 0; i < kMaxLineBuffers; i++)\n> +\t\tlineBuffers_[i] = nullptr;\n> +}\n> +\n> +DebayerCpu::~DebayerCpu()\n> +{\n> +\tfor (unsigned int i = 0; i < kMaxLineBuffers; i++)\n> +\t\tfree(lineBuffers_[i]);\n> +}\n> +\n> +// RGR\n> +// GBG\n> +// RGR\n> +#define BGGR_BGR888(p, n, div)                                                                \\\n> +\t*dst++ = blue_[curr[x] / (div)];                                                      \\\n> +\t*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \\\n> +\t*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \\\n> +\tx++;\n> +\n> +// GBG\n> +// RGR\n> +// GBG\n> +#define GRBG_BGR888(p, n, div)                                    \\\n> +\t*dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \\\n> +\t*dst++ = green_[curr[x] / (div)];                         \\\n> +\t*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n> +\tx++;\n> +\n> +// GRG\n> +// BGB\n> +// GRG\n> +#define GBRG_BGR888(p, n, div)                                     \\\n> +\t*dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n> +\t*dst++ = green_[curr[x] / (div)];                          \\\n> +\t*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \\\n> +\tx++;\n> +\n> +// BGB\n> +// GRG\n> +// BGB\n> +#define RGGB_BGR888(p, n, div)                                                                 \\\n> +\t*dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \\\n> +\t*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \\\n> +\t*dst++ = red_[curr[x] / (div)];                                                        \\\n> +\tx++;\n> +\n> +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\t/*\n> +\t * For the first pixel getting a pixel from the previous column uses\n> +\t * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.\n> +\t * Same for last pixel (uses x + 2) and looking at the next column.\n> +\t * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed.\n> +\t */\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* First pixel */\n> +\t\tBGGR_BGR888(2, 1, 1)\n> +\t\t/* Second pixel BGGR -> GBRG */\n> +\t\tGBRG_BGR888(1, 1, 1)\n> +\t\t/* Same thing for third and fourth pixels */\n> +\t\tBGGR_BGR888(1, 1, 1)\n> +\t\tGBRG_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* First pixel */\n> +\t\tGRBG_BGR888(2, 1, 1)\n> +\t\t/* Second pixel GRBG -> RGGB */\n> +\t\tRGGB_BGR888(1, 1, 1)\n> +\t\t/* Same thing for third and fourth pixels */\n> +\t\tGRBG_BGR888(1, 1, 1)\n> +\t\tRGGB_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* Even pixel */\n> +\t\tGBRG_BGR888(2, 1, 1)\n> +\t\t/* Odd pixel GBGR -> BGGR */\n> +\t\tBGGR_BGR888(1, 1, 1)\n> +\t\t/* Same thing for next 2 pixels */\n> +\t\tGBRG_BGR888(1, 1, 1)\n> +\t\tBGGR_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* Even pixel */\n> +\t\tRGGB_BGR888(2, 1, 1)\n> +\t\t/* Odd pixel RGGB -> GRBG*/\n> +\t\tGRBG_BGR888(1, 1, 1)\n> +\t\t/* Same thing for next 2 pixels */\n> +\t\tRGGB_BGR888(1, 1, 1)\n> +\t\tGRBG_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +static bool isStandardBayerOrder(BayerFormat::Order order)\n> +{\n> +\treturn order == BayerFormat::BGGR || order == BayerFormat::GBRG ||\n> +\t       order == BayerFormat::GRBG || order == BayerFormat::RGGB;\n> +}\n> +\n> +/*\n> + * Setup the Debayer object according to the passed in parameters.\n> + * Return 0 on success, a negative errno value on failure\n> + * (unsupported parameters).\n> + */\n> +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)\n> +{\n> +\tBayerFormat bayerFormat =\n> +\t\tBayerFormat::fromPixelFormat(inputFormat);\n> +\n> +\tif (bayerFormat.bitDepth == 10 &&\n> +\t    bayerFormat.packing == BayerFormat::Packing::CSI2 &&\n> +\t    isStandardBayerOrder(bayerFormat.order)) {\n> +\t\tconfig.bpp = 10;\n> +\t\tconfig.patternSize.width = 4; /* 5 bytes per *4* pixels */\n> +\t\tconfig.patternSize.height = 2;\n> +\t\tconfig.outputFormats = std::vector<PixelFormat>({ formats::RGB888 });\n> +\t\treturn 0;\n> +\t}\n> +\n> +\tLOG(Debayer, Info)\n> +\t\t<< \"Unsupported input format \" << inputFormat.toString();\n> +\treturn -EINVAL;\n> +}\n> +\n> +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)\n> +{\n> +\tif (outputFormat == formats::RGB888) {\n> +\t\tconfig.bpp = 24;\n> +\t\treturn 0;\n> +\t}\n> +\n> +\tLOG(Debayer, Info)\n> +\t\t<< \"Unsupported output format \" << outputFormat.toString();\n> +\treturn -EINVAL;\n> +}\n> +\n> +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */\n> +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat)\n> +{\n> +\tBayerFormat bayerFormat =\n> +\t\tBayerFormat::fromPixelFormat(inputFormat);\n> +\n> +\tif (bayerFormat.bitDepth == 10 &&\n> +\t    bayerFormat.packing == BayerFormat::Packing::CSI2) {\n> +\t\tswitch (bayerFormat.order) {\n> +\t\tcase BayerFormat::BGGR:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n> +\t\t\treturn 0;\n> +\t\tcase BayerFormat::GBRG:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n> +\t\t\treturn 0;\n> +\t\tcase BayerFormat::GRBG:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n> +\t\t\treturn 0;\n> +\t\tcase BayerFormat::RGGB:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n> +\t\t\treturn 0;\n> +\t\tdefault:\n> +\t\t\tbreak;\n> +\t\t}\n> +\t}\n> +\n> +\tLOG(Debayer, Error) << \"Unsupported input output format combination\";\n> +\treturn -EINVAL;\n> +}\n> +\n> +int DebayerCpu::configure(const StreamConfiguration &inputCfg,\n> +\t\t\t  const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)\n> +{\n> +\tif (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)\n> +\t\treturn -EINVAL;\n> +\n> +\tif (stats_->configure(inputCfg) != 0)\n> +\t\treturn -EINVAL;\n> +\n> +\tconst Size &stats_pattern_size = stats_->patternSize();\n> +\tif (inputConfig_.patternSize.width != stats_pattern_size.width ||\n> +\t    inputConfig_.patternSize.height != stats_pattern_size.height) {\n> +\t\tLOG(Debayer, Error)\n> +\t\t\t<< \"mismatching stats and debayer pattern sizes for \"\n> +\t\t\t<< inputCfg.pixelFormat.toString();\n> +\t\treturn -EINVAL;\n> +\t}\n> +\n> +\tinputConfig_.stride = inputCfg.stride;\n> +\n> +\tif (outputCfgs.size() != 1) {\n> +\t\tLOG(Debayer, Error)\n> +\t\t\t<< \"Unsupported number of output streams: \"\n> +\t\t\t<< outputCfgs.size();\n> +\t\treturn -EINVAL;\n> +\t}\n> +\n> +\tconst StreamConfiguration &outputCfg = outputCfgs[0];\n> +\tSizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);\n> +\tstd::tie(outputConfig_.stride, outputConfig_.frameSize) =\n> +\t\tstrideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);\n> +\n> +\tif (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {\n> +\t\tLOG(Debayer, Error)\n> +\t\t\t<< \"Invalid output size/stride: \"\n> +\t\t\t<< \"\\n  \" << outputCfg.size << \" (\" << outSizeRange << \")\"\n> +\t\t\t<< \"\\n  \" << outputCfg.stride << \" (\" << outputConfig_.stride << \")\";\n> +\t\treturn -EINVAL;\n> +\t}\n> +\n> +\tif (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0)\n> +\t\treturn -EINVAL;\n> +\n> +\twindow_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &\n> +\t\t    ~(inputConfig_.patternSize.width - 1);\n> +\twindow_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &\n> +\t\t    ~(inputConfig_.patternSize.height - 1);\n> +\twindow_.width = outputCfg.size.width;\n> +\twindow_.height = outputCfg.size.height;\n> +\n> +\t/* Don't pass x,y since process() already adjusts src before passing it */\n> +\tstats_->setWindow(Rectangle(window_.size()));\n> +\n> +\t/* pad with patternSize.Width on both left and right side */\n> +\tlineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8;\n> +\tlineBufferLength_ = window_.width * inputConfig_.bpp / 8 +\n> +\t\t\t    2 * lineBufferPadding_;\n> +\tfor (unsigned int i = 0;\n> +\t     i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_;\n> +\t     i++) {\n> +\t\tfree(lineBuffers_[i]);\n> +\t\tlineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_);\n> +\t\tif (!lineBuffers_[i])\n> +\t\t\treturn -ENOMEM;\n> +\t}\n> +\n> +\tmeasuredFrames_ = 0;\n> +\tframeProcessTime_ = 0;\n> +\n> +\treturn 0;\n> +}\n> +\n> +/*\n> + * Get width and height at which the bayer-pattern repeats.\n> + * Return pattern-size or an empty Size for an unsupported inputFormat.\n> + */\n> +Size DebayerCpu::patternSize(PixelFormat inputFormat)\n> +{\n> +\tDebayerCpu::DebayerInputConfig config;\n> +\n> +\tif (getInputConfig(inputFormat, config) != 0)\n> +\t\treturn {};\n> +\n> +\treturn config.patternSize;\n> +}\n> +\n> +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)\n> +{\n> +\tDebayerCpu::DebayerInputConfig config;\n> +\n> +\tif (getInputConfig(inputFormat, config) != 0)\n> +\t\treturn std::vector<PixelFormat>();\n> +\n> +\treturn config.outputFormats;\n> +}\n> +\n> +std::tuple<unsigned int, unsigned int>\n> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)\n> +{\n> +\tDebayerCpu::DebayerOutputConfig config;\n> +\n> +\tif (getOutputConfig(outputFormat, config) != 0)\n> +\t\treturn std::make_tuple(0, 0);\n> +\n> +\t/* round up to multiple of 8 for 64 bits alignment */\n> +\tunsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;\n> +\n> +\treturn std::make_tuple(stride, stride * size.height);\n> +}\n> +\n> +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n> +{\n> +\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\n> +\tif (!enableInputMemcpy_)\n> +\t\treturn;\n> +\n> +\tfor (unsigned int i = 0; i < patternHeight; i++) {\n> +\t\tmemcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_,\n> +\t\t       lineBufferLength_);\n> +\t\tlinePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_;\n> +\t}\n> +\n> +\t/* Point lineBufferIndex_ to first unused lineBuffer */\n> +\tlineBufferIndex_ = patternHeight;\n> +}\n> +\n> +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)\n> +{\n> +\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\n> +\tfor (unsigned int i = 0; i < patternHeight; i++)\n> +\t\tlinePointers[i] = linePointers[i + 1];\n> +\n> +\tlinePointers[patternHeight] = src +\n> +\t\t\t\t      (patternHeight / 2) * (int)inputConfig_.stride;\n> +}\n> +\n> +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])\n> +{\n> +\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\n> +\tif (!enableInputMemcpy_)\n> +\t\treturn;\n> +\n> +\tmemcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_,\n> +\t       lineBufferLength_);\n> +\tlinePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_;\n> +\n> +\tlineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);\n> +}\n> +\n> +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)\n> +{\n> +\tunsigned int y_end = window_.y + window_.height;\n> +\t/* Holds [0] previous- [1] current- [2] next-line */\n> +\tconst uint8_t *linePointers[3];\n> +\n> +\t/* Adjust src to top left corner of the window */\n> +\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> +\n> +\t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n> +\tif (window_.y) {\n> +\t\tlinePointers[1] = src - inputConfig_.stride; /* previous-line */\n> +\t\tlinePointers[2] = src;\n> +\t} else {\n> +\t\t/* window_.y == 0, use the next line as prev line */\n> +\t\tlinePointers[1] = src + inputConfig_.stride;\n> +\t\tlinePointers[2] = src;\n> +\t\t/* Last 2 lines also need special handling */\n> +\t\ty_end -= 2;\n> +\t}\n> +\n> +\tsetupInputMemcpy(linePointers);\n> +\n> +\tfor (unsigned int y = window_.y; y < y_end; y += 2) {\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine0(y, linePointers);\n> +\t\t(this->*debayer0_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\t(this->*debayer1_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\t}\n> +\n> +\tif (window_.y == 0) {\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine0(y_end, linePointers);\n> +\t\t(this->*debayer0_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\t/* next line may point outside of src, use prev. */\n> +\t\tlinePointers[2] = linePointers[0];\n> +\t\t(this->*debayer1_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\t}\n> +}\n> +\n> +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)\n> +{\n> +\tconst unsigned int y_end = window_.y + window_.height;\n> +\t/*\n> +\t * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line\n> +\t * [3] 1-line-down [4] 2-lines-down.\n> +\t */\n> +\tconst uint8_t *linePointers[5];\n> +\n> +\t/* Adjust src to top left corner of the window */\n> +\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> +\n> +\t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n> +\tlinePointers[1] = src - 2 * inputConfig_.stride;\n> +\tlinePointers[2] = src - inputConfig_.stride;\n> +\tlinePointers[3] = src;\n> +\tlinePointers[4] = src + inputConfig_.stride;\n> +\n> +\tsetupInputMemcpy(linePointers);\n> +\n> +\tfor (unsigned int y = window_.y; y < y_end; y += 4) {\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine0(y, linePointers);\n> +\t\t(this->*debayer0_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\t(this->*debayer1_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine2(y, linePointers);\n> +\t\t(this->*debayer2_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\t(this->*debayer3_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\t}\n> +}\n> +\n> +static inline int64_t timeDiff(timespec &after, timespec &before)\n> +{\n> +\treturn (after.tv_sec - before.tv_sec) * 1000000000LL +\n> +\t       (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;\n> +}\n> +\n> +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)\n> +{\n> +\ttimespec frameStartTime;\n> +\n> +\tif (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) {\n> +\t\tframeStartTime = {};\n> +\t\tclock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);\n> +\t}\n> +\n> +\t/* Apply DebayerParams */\n> +\tif (params.gamma != gamma_correction_) {\n> +\t\tfor (unsigned int i = 0; i < kGammaLookupSize; i++)\n> +\t\t\tgamma_[i] = UINT8_MAX * powf(i / (kGammaLookupSize - 1.0), params.gamma);\n> +\n> +\t\tgamma_correction_ = params.gamma;\n> +\t}\n> +\n> +\tfor (unsigned int i = 0; i < kRGBLookupSize; i++) {\n> +\t\tconstexpr unsigned int div =\n> +\t\t\tkRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize;\n> +\t\tunsigned int idx;\n> +\n> +\t\t/* Apply gamma after gain! */\n> +\t\tidx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) });\n> +\t\tred_[i] = gamma_[idx];\n> +\n> +\t\tidx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) });\n> +\t\tgreen_[i] = gamma_[idx];\n> +\n> +\t\tidx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) });\n> +\t\tblue_[i] = gamma_[idx];\n> +\t}\n> +\n> +\t/* Copy metadata from the input buffer */\n> +\tFrameMetadata &metadata = output->_d()->metadata();\n> +\tmetadata.status = input->metadata().status;\n> +\tmetadata.sequence = input->metadata().sequence;\n> +\tmetadata.timestamp = input->metadata().timestamp;\n> +\n> +\tMappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);\n> +\tMappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);\n> +\tif (!in.isValid() || !out.isValid()) {\n> +\t\tLOG(Debayer, Error) << \"mmap-ing buffer(s) failed\";\n> +\t\tmetadata.status = FrameMetadata::FrameError;\n> +\t\treturn;\n> +\t}\n> +\n> +\tstats_->startFrame();\n> +\n> +\tif (inputConfig_.patternSize.height == 2)\n> +\t\tprocess2(in.planes()[0].data(), out.planes()[0].data());\n> +\telse\n> +\t\tprocess4(in.planes()[0].data(), out.planes()[0].data());\n> +\n> +\tmetadata.planes()[0].bytesused = out.planes()[0].size();\n> +\n> +\t/* Measure before emitting signals */\n> +\tif (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&\n> +\t    ++measuredFrames_ > DebayerCpu::kFramesToSkip) {\n> +\t\ttimespec frameEndTime = {};\n> +\t\tclock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);\n> +\t\tframeProcessTime_ += timeDiff(frameEndTime, frameStartTime);\n> +\t\tif (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) {\n> +\t\t\tconst unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure -\n> +\t\t\t\t\t\t\t    DebayerCpu::kFramesToSkip;\n> +\t\t\tLOG(Debayer, Info)\n> +\t\t\t\t<< \"Processed \" << measuredFrames\n> +\t\t\t\t<< \" frames in \" << frameProcessTime_ / 1000 << \"us, \"\n> +\t\t\t\t<< frameProcessTime_ / (1000 * measuredFrames)\n> +\t\t\t\t<< \" us/frame\";\n> +\t\t}\n> +\t}\n> +\n> +\tstats_->finishFrame();\n> +\toutputBufferReady.emit(output);\n> +\tinputBufferReady.emit(input);\n> +}\n> +\n> +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)\n> +{\n> +\tSize pattern_size = patternSize(inputFormat);\n> +\tunsigned int border_height = pattern_size.height;\n> +\n> +\tif (pattern_size.isNull())\n> +\t\treturn {};\n> +\n> +\t/* No need for top/bottom border with a pattern height of 2 */\n> +\tif (pattern_size.height == 2)\n> +\t\tborder_height = 0;\n> +\n> +\t/*\n> +\t * For debayer interpolation a border is kept around the entire image\n> +\t * and the minimum output size is pattern-height x pattern-width.\n> +\t */\n\nWhat if the output size is larger?  The border is quite impractical because it\nforces (or not?) the output size to be non-standard, assuming the camera\nprovides common resolutions.  Consider e.g. full-HD camera resolution not being\nable to be output 1:1 to a full-HD display.\n \nI don't think this should block the current patch series, but it should be\nconsidered for future enhancements.\n\n> \n> +\tif (inputSize.width < (3 * pattern_size.width) ||\n> +\t    inputSize.height < (2 * border_height + pattern_size.height)) {\n> +\t\tLOG(Debayer, Warning)\n> +\t\t\t<< \"Input format size too small: \" << inputSize.toString();\n> +\t\treturn {};\n> +\t}\n> +\n> +\treturn SizeRange(Size(pattern_size.width, pattern_size.height),\n> +\t\t\t Size((inputSize.width - 2 * pattern_size.width) & ~(pattern_size.width - 1),\n> +\t\t\t      (inputSize.height - 2 * border_height) & ~(pattern_size.height - 1)),\n> +\t\t\t pattern_size.width, pattern_size.height);\n> +}\n> +\n> +} /* namespace libcamera */\n> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h\n> new file mode 100644\n> index 00000000..e0c4b9a8\n> --- /dev/null\n> +++ b/src/libcamera/software_isp/debayer_cpu.h\n> @@ -0,0 +1,143 @@\n> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n> +/*\n> + * Copyright (C) 2023, Linaro Ltd\n> + * Copyright (C) 2023, Red Hat Inc.\n> + *\n> + * Authors:\n> + * Hans de Goede <hdegoede@redhat.com>\n> + *\n> + * debayer_cpu.h - CPU based debayering header\n> + */\n> +\n> +#pragma once\n> +\n> +#include <memory>\n> +#include <stdint.h>\n> +#include <vector>\n> +\n> +#include <libcamera/base/object.h>\n> +\n> +#include \"debayer.h\"\n> +#include \"swstats_cpu.h\"\n> +\n> +namespace libcamera {\n> +\n> +class DebayerCpu : public Debayer, public Object\n> +{\n> +public:\n> +\tDebayerCpu(std::unique_ptr<SwStatsCpu> stats);\n> +\t~DebayerCpu();\n> +\n> +\tint configure(const StreamConfiguration &inputCfg,\n> +\t\t      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);\n> +\tSize patternSize(PixelFormat inputFormat);\n> +\tstd::vector<PixelFormat> formats(PixelFormat input);\n> +\tstd::tuple<unsigned int, unsigned int>\n> +\tstrideAndFrameSize(const PixelFormat &outputFormat, const Size &size);\n> +\tvoid process(FrameBuffer *input, FrameBuffer *output, DebayerParams params);\n> +\tSizeRange sizes(PixelFormat inputFormat, const Size &inputSize);\n> +\n> +\t/**\n> +\t * \\brief Get the file descriptor for the statistics.\n> +\t *\n> +\t * \\return the file descriptor pointing to the statistics.\n> +\t */\n> +\tconst SharedFD &getStatsFD() { return stats_->getStatsFD(); }\n> +\n> +\t/**\n> +\t * \\brief Get the output frame size.\n> +\t *\n> +\t * \\return The output frame size.\n> +\t */\n> +\tunsigned int frameSize() { return outputConfig_.frameSize; }\n> +\n> +private:\n> +\t/**\n> +\t * \\brief Called to debayer 1 line of Bayer input data to output format\n> +\t * \\param[out] dst Pointer to the start of the output line to write\n> +\t * \\param[in] src The input data\n> +\t *\n> +\t * Input data is an array of (patternSize_.height + 1) src\n> +\t * pointers each pointing to a line in the Bayer source. The middle\n> +\t * element of the array will point to the actual line being processed.\n> +\t * Earlier element(s) will point to the previous line(s) and later\n> +\t * element(s) to the next line(s).\n> +\t *\n> +\t * These functions take an array of src pointers, rather then\n> +\t * a single src pointer + a stride for the source, so that when the src\n> +\t * is slow uncached memory it can be copied to faster memory before\n> +\t * debayering. Debayering a standard 2x2 Bayer pattern requires access\n> +\t * to the previous and next src lines for interpolating the missing\n> +\t * colors. To allow copying the src lines only once 3 buffers each\n\nI'd avoid using the term \"buffer\" here to avoid any confusion with input and\noutput buffers.\n\n> +\t * holding a single line are used, re-using the oldest buffer for\n> +\t * the next line and the pointers are swizzled so that:\n> +\t * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line.\n> +\t * This way the 3 pointers passed to the debayer functions form\n> +\t * a sliding window over the src avoiding the need to copy each\n> +\t * line more then once.\n\nthen -> than\n\n> +\t *\n> +\t * Similarly for bayer patterns which repeat every 4 lines, 5 src\n> +\t * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up\n> +\t * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down.\n> +\t */\n> +\ttypedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]);\n\ntypedef -> using ?\n\n> +\n> +\t/* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */\n> +\tvoid debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\tvoid debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\tvoid debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\tvoid debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\n> +\tstruct DebayerInputConfig {\n> +\t\tSize patternSize;\n> +\t\tunsigned int bpp; /* Memory used per pixel, not precision */\n> +\t\tunsigned int stride;\n> +\t\tstd::vector<PixelFormat> outputFormats;\n> +\t};\n> +\n> +\tstruct DebayerOutputConfig {\n> +\t\tunsigned int bpp; /* Memory used per pixel, not precision */\n> +\t\tunsigned int stride;\n> +\t\tunsigned int frameSize;\n> +\t};\n> +\n> +\tint getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);\n> +\tint getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);\n> +\tint setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat);\n> +\tvoid setupInputMemcpy(const uint8_t *linePointers[]);\n> +\tvoid shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);\n> +\tvoid memcpyNextLine(const uint8_t *linePointers[]);\n> +\tvoid process2(const uint8_t *src, uint8_t *dst);\n> +\tvoid process4(const uint8_t *src, uint8_t *dst);\n> +\n> +\tstatic constexpr unsigned int kGammaLookupSize = 1024;\n> +\tstatic constexpr unsigned int kRGBLookupSize = 256;\n> +\t/* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */\n> +\tstatic constexpr unsigned int kMaxLineBuffers = 5;\n> +\n> +\tstd::array<uint8_t, kGammaLookupSize> gamma_;\n> +\tstd::array<uint8_t, kRGBLookupSize> red_;\n> +\tstd::array<uint8_t, kRGBLookupSize> green_;\n> +\tstd::array<uint8_t, kRGBLookupSize> blue_;\n> +\tdebayerFn debayer0_;\n> +\tdebayerFn debayer1_;\n> +\tdebayerFn debayer2_;\n> +\tdebayerFn debayer3_;\n> +\tRectangle window_;\n> +\tDebayerInputConfig inputConfig_;\n> +\tDebayerOutputConfig outputConfig_;\n> +\tstd::unique_ptr<SwStatsCpu> stats_;\n> +\tuint8_t *lineBuffers_[kMaxLineBuffers];\n> +\tunsigned int lineBufferLength_;\n> +\tunsigned int lineBufferPadding_;\n> +\tunsigned int lineBufferIndex_;\n> +\tbool enableInputMemcpy_;\n> +\tfloat gamma_correction_;\n> +\tunsigned int measuredFrames_;\n> +\tint64_t frameProcessTime_;\n> +\t/* Skip 30 frames for things to stabilize then measure 30 frames */\n> +\tstatic constexpr unsigned int kFramesToSkip = 30;\n> +\tstatic constexpr unsigned int kLastFrameToMeasure = 60;\n> +};\n> +\n> +} /* namespace libcamera */\n> diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build\n> index 92fc90f3..dd5e70c1 100644\n> --- a/src/libcamera/software_isp/meson.build\n> +++ b/src/libcamera/software_isp/meson.build\n> @@ -2,5 +2,6 @@\n>  \n>  libcamera_sources += files([\n>      'debayer.cpp',\n> +    'debayer_cpu.cpp',\n>      'swstats_cpu.cpp',\n>  ])","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id D4C3EC3257\n\tfor <parsemail@patchwork.libcamera.org>;\n\tThu, 15 Feb 2024 15:34:03 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 2115C62805;\n\tThu, 15 Feb 2024 16:34:03 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.133.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 49A3261CB0\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tThu, 15 Feb 2024 16:34:01 +0100 (CET)","from mail-wm1-f72.google.com (mail-wm1-f72.google.com\n\t[209.85.128.72]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-652-XlmDgN-fNPWqjSmeyeQixg-1; Thu, 15 Feb 2024 10:33:58 -0500","by mail-wm1-f72.google.com with SMTP id\n\t5b1f17b1804b1-4102b934ba0so5913445e9.2\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tThu, 15 Feb 2024 07:33:58 -0800 (PST)","from nuthatch (nat-pool-brq-t.redhat.com. [213.175.37.10])\n\tby smtp.gmail.com with ESMTPSA id\n\ty5-20020a7bcd85000000b00410ab50f70fsm5276933wmj.15.2024.02.15.07.33.56\n\t(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n\tThu, 15 Feb 2024 07:33:56 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"ErOdiMw5\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1708011240;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=haNwo9f+r2I+F51r1cTYr2GVnwUgdLn/ammKFiSfLrc=;\n\tb=ErOdiMw5TzUYsGTEXgyej2yJjh5ahr/EwcU+qBVk5ymgPgwHM9XMMMCaQWqY7SMgJ9PzoD\n\t/du62kp8ksx50eDH8ze2AT3GWU93Vyy1INRZgNsLt9DMfGXAeZxGER4g0VcsRO6VHtWhW+\n\tndpuWt2pqyqzXjncZUyjcq5P665wCQk=","X-MC-Unique":"XlmDgN-fNPWqjSmeyeQixg-1","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1708011237; x=1708616037;\n\th=mime-version:user-agent:message-id:date:references:in-reply-to\n\t:subject:cc:to:from:x-gm-message-state:from:to:cc:subject:date\n\t:message-id:reply-to;\n\tbh=haNwo9f+r2I+F51r1cTYr2GVnwUgdLn/ammKFiSfLrc=;\n\tb=uBSH9VPbNROYuQaJAnZjSbdoWYeFA5C70JO0Q4PDzwGDx3oN+CU4q8ydjwnwF/BWI0\n\tAjj+/VQiUzrLClD7PbXQs8b5+rzYSG+/dDFLRwPNF9dPFMgav1jV2sAWYbmEEq8nmT01\n\tjNOh6KsMAxD+AQNeDw2EWg5Hh+rRA15UEV4hV9MlJyryIvtD1n/YkbpjJIvc6aIxrKv0\n\tV3ERbClUlb7VVriWh9B3V2lU2mwGlDAyJnDGPMYDEKPgx0zkw0tzjSac00HAxhWVGNGv\n\tLz6lZ+7FiAEiz3TfT88ftySKtVl072GYIcGHiaNToj6UN9msdkO588E8rcwGZolLlY5M\n\tD5bQ==","X-Gm-Message-State":"AOJu0YyGh0eAFLt6R07Ti8uIki5/5iH0dbmz5beLB/GjBYDmf4+Jhizn\n\tUEfLjufjgw02qPeXjU5fgT8D3kffuC+8ocHeNwbNI5clSEOHYs8wjXNbEJZR01DN5FhwXJt6bwp\n\tsfjGEvKdCsOyH6NOwGw4lBJvjoiznqMKzGeIg9NPXi5DsUWSoo0GIR+oPNL3/RDp+QZKwUJo=","X-Received":["by 2002:a05:600c:3554:b0:412:a7b:6959 with SMTP id\n\ti20-20020a05600c355400b004120a7b6959mr1481826wmq.7.1708011237436; \n\tThu, 15 Feb 2024 07:33:57 -0800 (PST)","by 2002:a05:600c:3554:b0:412:a7b:6959 with SMTP id\n\ti20-20020a05600c355400b004120a7b6959mr1481805wmq.7.1708011236897; \n\tThu, 15 Feb 2024 07:33:56 -0800 (PST)"],"X-Google-Smtp-Source":"AGHT+IG3TpN42oIs6bNP8x+IHb7jJ1ms2QuQ8zYXcbA1GU+XLoshT7E3MbRRiPfx+oTgUKkNze6FKg==","From":"Milan Zamazal <mzamazal@redhat.com>","To":"Hans de Goede <hdegoede@redhat.com>","Subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","In-Reply-To":"<20240214170122.60754-9-hdegoede@redhat.com> (Hans de Goede's\n\tmessage of \"Wed, 14 Feb 2024 18:01:12 +0100\")","References":"<20240214170122.60754-1-hdegoede@redhat.com>\n\t<20240214170122.60754-9-hdegoede@redhat.com>","Date":"Thu, 15 Feb 2024 16:33:55 +0100","Message-ID":"<87bk8hd9ng.fsf@redhat.com>","User-Agent":"Gnus/5.13 (Gnus v5.13)","MIME-Version":"1.0","X-Mimecast-Spam-Score":"0","X-Mimecast-Originator":"redhat.com","Content-Type":"text/plain","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Cc":"Maxime Ripard <mripard@redhat.com>, libcamera-devel@lists.libcamera.org, \n\tPavel Machek <pavel@ucw.cz>,\n\tBryan O'Donoghue <bryan.odonoghue@linaro.org>, \n\tDennis Bonke <admin@dennisbonke.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":28695,"web_url":"https://patchwork.libcamera.org/comment/28695/","msgid":"<659d6c77-e8f3-4d8a-abeb-95c791c42662@ideasonboard.com>","date":"2024-02-19T17:09:08","subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","submitter":{"id":184,"url":"https://patchwork.libcamera.org/api/people/184/","name":"Stefan Klug","email":"stefan.klug@ideasonboard.com"},"content":"Hi Hans,\n\nthanks for your work on the SoftISP.\n\nAm 14.02.24 um 18:01 schrieb Hans de Goede:\n> Add CPU based debayering implementation. This initial implementation\n> only supports debayering packed 10 bits per pixel bayer data in\n> the 4 standard bayer orders.\n> \n> Doxygen documentation by Dennis Bonke.\n> \n> Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s\n> Tested-by: Pavel Machek <pavel@ucw.cz>\n> Reviewed-by: Pavel Machek <pavel@ucw.cz>\n> Co-developed-by: Dennis Bonke <admin@dennisbonke.com>\n> Signed-off-by: Dennis Bonke <admin@dennisbonke.com>\n> Co-developed-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n> Co-developed-by: Pavel Machek <pavel@ucw.cz>\n> Signed-off-by: Pavel Machek <pavel@ucw.cz>\n> Signed-off-by: Hans de Goede <hdegoede@redhat.com>\n> ---\n> Changes in v3:\n> - Move debayer_cpu.h to src/libcamera/software_isp/\n> - Move documentation to .cpp file\n> - Document how/why an array of src pointers is passed to\n>    the debayer functions\n> ---\n>   src/libcamera/software_isp/debayer_cpu.cpp | 619 +++++++++++++++++++++\n>   src/libcamera/software_isp/debayer_cpu.h   | 143 +++++\n>   src/libcamera/software_isp/meson.build     |   1 +\n>   3 files changed, 763 insertions(+)\n>   create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp\n>   create mode 100644 src/libcamera/software_isp/debayer_cpu.h\n> \n> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp\n> new file mode 100644\n> index 00000000..53e90776\n> --- /dev/null\n> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n> @@ -0,0 +1,619 @@\n> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n> +/*\n> + * Copyright (C) 2023, Linaro Ltd\n> + * Copyright (C) 2023, Red Hat Inc.\n> + *\n> + * Authors:\n> + * Hans de Goede <hdegoede@redhat.com>\n> + *\n> + * debayer_cpu.cpp - CPU based debayering class\n> + */\n> +\n> +#include \"debayer_cpu.h\"\n> +\n> +#include <math.h>\n> +#include <stdlib.h>\n> +#include <time.h>\n> +\n> +#include <libcamera/formats.h>\n> +\n> +#include \"libcamera/internal/bayer_format.h\"\n> +#include \"libcamera/internal/framebuffer.h\"\n> +#include \"libcamera/internal/mapped_framebuffer.h\"\n> +\n> +namespace libcamera {\n> +\n> +/**\n> + * \\class DebayerCpu\n> + * \\brief Class for debayering on the CPU\n> + *\n> + * Implementation for CPU based debayering\n> + */\n> +\n> +/**\n> + * \\brief Constructs a DebayerCpu object.\n> + * \\param[in] stats Pointer to the stats object to use.\n> + */\n> +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)\n\nI tried to use the ISP without statistics/regulation altogether and just \nset stats to a nullptr. This fails in a few places. IMHO it would \nimprove flexibility & reusability to allow that.\nAttched is a patch with the modifications as I needed them anyways. Feel \nfree to include them.\n\nCheers Stefan\n\n> +\t: stats_(std::move(stats)), gamma_correction_(1.0)\n> +{\n> +#ifdef __x86_64__\n> +\tenableInputMemcpy_ = false;\n> +#else\n> +\tenableInputMemcpy_ = true;\n> +#endif\n> +\t/* Initialize gamma to 1.0 curve */\n> +\tfor (unsigned int i = 0; i < kGammaLookupSize; i++)\n> +\t\tgamma_[i] = i / 4;\n> +\n> +\tfor (unsigned int i = 0; i < kMaxLineBuffers; i++)\n> +\t\tlineBuffers_[i] = nullptr;\n> +}\n> +\n> +DebayerCpu::~DebayerCpu()\n> +{\n> +\tfor (unsigned int i = 0; i < kMaxLineBuffers; i++)\n> +\t\tfree(lineBuffers_[i]);\n> +}\n> +\n> +// RGR\n> +// GBG\n> +// RGR\n> +#define BGGR_BGR888(p, n, div)                                                                \\\n> +\t*dst++ = blue_[curr[x] / (div)];                                                      \\\n> +\t*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \\\n> +\t*dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \\\n> +\tx++;\n> +\n> +// GBG\n> +// RGR\n> +// GBG\n> +#define GRBG_BGR888(p, n, div)                                    \\\n> +\t*dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \\\n> +\t*dst++ = green_[curr[x] / (div)];                         \\\n> +\t*dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n> +\tx++;\n> +\n> +// GRG\n> +// BGB\n> +// GRG\n> +#define GBRG_BGR888(p, n, div)                                     \\\n> +\t*dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n> +\t*dst++ = green_[curr[x] / (div)];                          \\\n> +\t*dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \\\n> +\tx++;\n> +\n> +// BGB\n> +// GRG\n> +// BGB\n> +#define RGGB_BGR888(p, n, div)                                                                 \\\n> +\t*dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \\\n> +\t*dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \\\n> +\t*dst++ = red_[curr[x] / (div)];                                                        \\\n> +\tx++;\n> +\n> +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\t/*\n> +\t * For the first pixel getting a pixel from the previous column uses\n> +\t * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.\n> +\t * Same for last pixel (uses x + 2) and looking at the next column.\n> +\t * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed.\n> +\t */\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* First pixel */\n> +\t\tBGGR_BGR888(2, 1, 1)\n> +\t\t/* Second pixel BGGR -> GBRG */\n> +\t\tGBRG_BGR888(1, 1, 1)\n> +\t\t/* Same thing for third and fourth pixels */\n> +\t\tBGGR_BGR888(1, 1, 1)\n> +\t\tGBRG_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* First pixel */\n> +\t\tGRBG_BGR888(2, 1, 1)\n> +\t\t/* Second pixel GRBG -> RGGB */\n> +\t\tRGGB_BGR888(1, 1, 1)\n> +\t\t/* Same thing for third and fourth pixels */\n> +\t\tGRBG_BGR888(1, 1, 1)\n> +\t\tRGGB_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* Even pixel */\n> +\t\tGBRG_BGR888(2, 1, 1)\n> +\t\t/* Odd pixel GBGR -> BGGR */\n> +\t\tBGGR_BGR888(1, 1, 1)\n> +\t\t/* Same thing for next 2 pixels */\n> +\t\tGBRG_BGR888(1, 1, 1)\n> +\t\tBGGR_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])\n> +{\n> +\tconst int width_in_bytes = window_.width * 5 / 4;\n> +\tconst uint8_t *prev = (const uint8_t *)src[0];\n> +\tconst uint8_t *curr = (const uint8_t *)src[1];\n> +\tconst uint8_t *next = (const uint8_t *)src[2];\n> +\n> +\tfor (int x = 0; x < width_in_bytes; x++) {\n> +\t\t/* Even pixel */\n> +\t\tRGGB_BGR888(2, 1, 1)\n> +\t\t/* Odd pixel RGGB -> GRBG*/\n> +\t\tGRBG_BGR888(1, 1, 1)\n> +\t\t/* Same thing for next 2 pixels */\n> +\t\tRGGB_BGR888(1, 1, 1)\n> +\t\tGRBG_BGR888(1, 2, 1)\n> +\t}\n> +}\n> +\n> +static bool isStandardBayerOrder(BayerFormat::Order order)\n> +{\n> +\treturn order == BayerFormat::BGGR || order == BayerFormat::GBRG ||\n> +\t       order == BayerFormat::GRBG || order == BayerFormat::RGGB;\n> +}\n> +\n> +/*\n> + * Setup the Debayer object according to the passed in parameters.\n> + * Return 0 on success, a negative errno value on failure\n> + * (unsupported parameters).\n> + */\n> +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)\n> +{\n> +\tBayerFormat bayerFormat =\n> +\t\tBayerFormat::fromPixelFormat(inputFormat);\n> +\n> +\tif (bayerFormat.bitDepth == 10 &&\n> +\t    bayerFormat.packing == BayerFormat::Packing::CSI2 &&\n> +\t    isStandardBayerOrder(bayerFormat.order)) {\n> +\t\tconfig.bpp = 10;\n> +\t\tconfig.patternSize.width = 4; /* 5 bytes per *4* pixels */\n> +\t\tconfig.patternSize.height = 2;\n> +\t\tconfig.outputFormats = std::vector<PixelFormat>({ formats::RGB888 });\n> +\t\treturn 0;\n> +\t}\n> +\n> +\tLOG(Debayer, Info)\n> +\t\t<< \"Unsupported input format \" << inputFormat.toString();\n> +\treturn -EINVAL;\n> +}\n> +\n> +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)\n> +{\n> +\tif (outputFormat == formats::RGB888) {\n> +\t\tconfig.bpp = 24;\n> +\t\treturn 0;\n> +\t}\n> +\n> +\tLOG(Debayer, Info)\n> +\t\t<< \"Unsupported output format \" << outputFormat.toString();\n> +\treturn -EINVAL;\n> +}\n> +\n> +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */\n> +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat)\n> +{\n> +\tBayerFormat bayerFormat =\n> +\t\tBayerFormat::fromPixelFormat(inputFormat);\n> +\n> +\tif (bayerFormat.bitDepth == 10 &&\n> +\t    bayerFormat.packing == BayerFormat::Packing::CSI2) {\n> +\t\tswitch (bayerFormat.order) {\n> +\t\tcase BayerFormat::BGGR:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n> +\t\t\treturn 0;\n> +\t\tcase BayerFormat::GBRG:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n> +\t\t\treturn 0;\n> +\t\tcase BayerFormat::GRBG:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n> +\t\t\treturn 0;\n> +\t\tcase BayerFormat::RGGB:\n> +\t\t\tdebayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n> +\t\t\tdebayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n> +\t\t\treturn 0;\n> +\t\tdefault:\n> +\t\t\tbreak;\n> +\t\t}\n> +\t}\n> +\n> +\tLOG(Debayer, Error) << \"Unsupported input output format combination\";\n> +\treturn -EINVAL;\n> +}\n> +\n> +int DebayerCpu::configure(const StreamConfiguration &inputCfg,\n> +\t\t\t  const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)\n> +{\n> +\tif (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)\n> +\t\treturn -EINVAL;\n> +\n> +\tif (stats_->configure(inputCfg) != 0)\n> +\t\treturn -EINVAL;\n> +\n> +\tconst Size &stats_pattern_size = stats_->patternSize();\n> +\tif (inputConfig_.patternSize.width != stats_pattern_size.width ||\n> +\t    inputConfig_.patternSize.height != stats_pattern_size.height) {\n> +\t\tLOG(Debayer, Error)\n> +\t\t\t<< \"mismatching stats and debayer pattern sizes for \"\n> +\t\t\t<< inputCfg.pixelFormat.toString();\n> +\t\treturn -EINVAL;\n> +\t}\n> +\n> +\tinputConfig_.stride = inputCfg.stride;\n> +\n> +\tif (outputCfgs.size() != 1) {\n> +\t\tLOG(Debayer, Error)\n> +\t\t\t<< \"Unsupported number of output streams: \"\n> +\t\t\t<< outputCfgs.size();\n> +\t\treturn -EINVAL;\n> +\t}\n> +\n> +\tconst StreamConfiguration &outputCfg = outputCfgs[0];\n> +\tSizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);\n> +\tstd::tie(outputConfig_.stride, outputConfig_.frameSize) =\n> +\t\tstrideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);\n> +\n> +\tif (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {\n> +\t\tLOG(Debayer, Error)\n> +\t\t\t<< \"Invalid output size/stride: \"\n> +\t\t\t<< \"\\n  \" << outputCfg.size << \" (\" << outSizeRange << \")\"\n> +\t\t\t<< \"\\n  \" << outputCfg.stride << \" (\" << outputConfig_.stride << \")\";\n> +\t\treturn -EINVAL;\n> +\t}\n> +\n> +\tif (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0)\n> +\t\treturn -EINVAL;\n> +\n> +\twindow_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &\n> +\t\t    ~(inputConfig_.patternSize.width - 1);\n> +\twindow_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &\n> +\t\t    ~(inputConfig_.patternSize.height - 1);\n> +\twindow_.width = outputCfg.size.width;\n> +\twindow_.height = outputCfg.size.height;\n> +\n> +\t/* Don't pass x,y since process() already adjusts src before passing it */\n> +\tstats_->setWindow(Rectangle(window_.size()));\n> +\n> +\t/* pad with patternSize.Width on both left and right side */\n> +\tlineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8;\n> +\tlineBufferLength_ = window_.width * inputConfig_.bpp / 8 +\n> +\t\t\t    2 * lineBufferPadding_;\n> +\tfor (unsigned int i = 0;\n> +\t     i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_;\n> +\t     i++) {\n> +\t\tfree(lineBuffers_[i]);\n> +\t\tlineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_);\n> +\t\tif (!lineBuffers_[i])\n> +\t\t\treturn -ENOMEM;\n> +\t}\n> +\n> +\tmeasuredFrames_ = 0;\n> +\tframeProcessTime_ = 0;\n> +\n> +\treturn 0;\n> +}\n> +\n> +/*\n> + * Get width and height at which the bayer-pattern repeats.\n> + * Return pattern-size or an empty Size for an unsupported inputFormat.\n> + */\n> +Size DebayerCpu::patternSize(PixelFormat inputFormat)\n> +{\n> +\tDebayerCpu::DebayerInputConfig config;\n> +\n> +\tif (getInputConfig(inputFormat, config) != 0)\n> +\t\treturn {};\n> +\n> +\treturn config.patternSize;\n> +}\n> +\n> +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)\n> +{\n> +\tDebayerCpu::DebayerInputConfig config;\n> +\n> +\tif (getInputConfig(inputFormat, config) != 0)\n> +\t\treturn std::vector<PixelFormat>();\n> +\n> +\treturn config.outputFormats;\n> +}\n> +\n> +std::tuple<unsigned int, unsigned int>\n> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)\n> +{\n> +\tDebayerCpu::DebayerOutputConfig config;\n> +\n> +\tif (getOutputConfig(outputFormat, config) != 0)\n> +\t\treturn std::make_tuple(0, 0);\n> +\n> +\t/* round up to multiple of 8 for 64 bits alignment */\n> +\tunsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;\n> +\n> +\treturn std::make_tuple(stride, stride * size.height);\n> +}\n> +\n> +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n> +{\n> +\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\n> +\tif (!enableInputMemcpy_)\n> +\t\treturn;\n> +\n> +\tfor (unsigned int i = 0; i < patternHeight; i++) {\n> +\t\tmemcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_,\n> +\t\t       lineBufferLength_);\n> +\t\tlinePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_;\n> +\t}\n> +\n> +\t/* Point lineBufferIndex_ to first unused lineBuffer */\n> +\tlineBufferIndex_ = patternHeight;\n> +}\n> +\n> +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)\n> +{\n> +\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\n> +\tfor (unsigned int i = 0; i < patternHeight; i++)\n> +\t\tlinePointers[i] = linePointers[i + 1];\n> +\n> +\tlinePointers[patternHeight] = src +\n> +\t\t\t\t      (patternHeight / 2) * (int)inputConfig_.stride;\n> +}\n> +\n> +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])\n> +{\n> +\tconst unsigned int patternHeight = inputConfig_.patternSize.height;\n> +\n> +\tif (!enableInputMemcpy_)\n> +\t\treturn;\n> +\n> +\tmemcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_,\n> +\t       lineBufferLength_);\n> +\tlinePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_;\n> +\n> +\tlineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);\n> +}\n> +\n> +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)\n> +{\n> +\tunsigned int y_end = window_.y + window_.height;\n> +\t/* Holds [0] previous- [1] current- [2] next-line */\n> +\tconst uint8_t *linePointers[3];\n> +\n> +\t/* Adjust src to top left corner of the window */\n> +\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> +\n> +\t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n> +\tif (window_.y) {\n> +\t\tlinePointers[1] = src - inputConfig_.stride; /* previous-line */\n> +\t\tlinePointers[2] = src;\n> +\t} else {\n> +\t\t/* window_.y == 0, use the next line as prev line */\n> +\t\tlinePointers[1] = src + inputConfig_.stride;\n> +\t\tlinePointers[2] = src;\n> +\t\t/* Last 2 lines also need special handling */\n> +\t\ty_end -= 2;\n> +\t}\n> +\n> +\tsetupInputMemcpy(linePointers);\n> +\n> +\tfor (unsigned int y = window_.y; y < y_end; y += 2) {\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine0(y, linePointers);\n> +\t\t(this->*debayer0_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\t(this->*debayer1_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\t}\n> +\n> +\tif (window_.y == 0) {\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine0(y_end, linePointers);\n> +\t\t(this->*debayer0_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\t/* next line may point outside of src, use prev. */\n> +\t\tlinePointers[2] = linePointers[0];\n> +\t\t(this->*debayer1_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\t}\n> +}\n> +\n> +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)\n> +{\n> +\tconst unsigned int y_end = window_.y + window_.height;\n> +\t/*\n> +\t * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line\n> +\t * [3] 1-line-down [4] 2-lines-down.\n> +\t */\n> +\tconst uint8_t *linePointers[5];\n> +\n> +\t/* Adjust src to top left corner of the window */\n> +\tsrc += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n> +\n> +\t/* [x] becomes [x - 1] after initial shiftLinePointers() call */\n> +\tlinePointers[1] = src - 2 * inputConfig_.stride;\n> +\tlinePointers[2] = src - inputConfig_.stride;\n> +\tlinePointers[3] = src;\n> +\tlinePointers[4] = src + inputConfig_.stride;\n> +\n> +\tsetupInputMemcpy(linePointers);\n> +\n> +\tfor (unsigned int y = window_.y; y < y_end; y += 4) {\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine0(y, linePointers);\n> +\t\t(this->*debayer0_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\t(this->*debayer1_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\tstats_->processLine2(y, linePointers);\n> +\t\t(this->*debayer2_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\n> +\t\tshiftLinePointers(linePointers, src);\n> +\t\tmemcpyNextLine(linePointers);\n> +\t\t(this->*debayer3_)(dst, linePointers);\n> +\t\tsrc += inputConfig_.stride;\n> +\t\tdst += outputConfig_.stride;\n> +\t}\n> +}\n> +\n> +static inline int64_t timeDiff(timespec &after, timespec &before)\n> +{\n> +\treturn (after.tv_sec - before.tv_sec) * 1000000000LL +\n> +\t       (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;\n> +}\n> +\n> +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)\n> +{\n> +\ttimespec frameStartTime;\n> +\n> +\tif (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) {\n> +\t\tframeStartTime = {};\n> +\t\tclock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);\n> +\t}\n> +\n> +\t/* Apply DebayerParams */\n> +\tif (params.gamma != gamma_correction_) {\n> +\t\tfor (unsigned int i = 0; i < kGammaLookupSize; i++)\n> +\t\t\tgamma_[i] = UINT8_MAX * powf(i / (kGammaLookupSize - 1.0), params.gamma);\n> +\n> +\t\tgamma_correction_ = params.gamma;\n> +\t}\n> +\n> +\tfor (unsigned int i = 0; i < kRGBLookupSize; i++) {\n> +\t\tconstexpr unsigned int div =\n> +\t\t\tkRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize;\n> +\t\tunsigned int idx;\n> +\n> +\t\t/* Apply gamma after gain! */\n> +\t\tidx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) });\n> +\t\tred_[i] = gamma_[idx];\n> +\n> +\t\tidx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) });\n> +\t\tgreen_[i] = gamma_[idx];\n> +\n> +\t\tidx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) });\n> +\t\tblue_[i] = gamma_[idx];\n> +\t}\n> +\n> +\t/* Copy metadata from the input buffer */\n> +\tFrameMetadata &metadata = output->_d()->metadata();\n> +\tmetadata.status = input->metadata().status;\n> +\tmetadata.sequence = input->metadata().sequence;\n> +\tmetadata.timestamp = input->metadata().timestamp;\n> +\n> +\tMappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);\n> +\tMappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);\n> +\tif (!in.isValid() || !out.isValid()) {\n> +\t\tLOG(Debayer, Error) << \"mmap-ing buffer(s) failed\";\n> +\t\tmetadata.status = FrameMetadata::FrameError;\n> +\t\treturn;\n> +\t}\n> +\n> +\tstats_->startFrame();\n> +\n> +\tif (inputConfig_.patternSize.height == 2)\n> +\t\tprocess2(in.planes()[0].data(), out.planes()[0].data());\n> +\telse\n> +\t\tprocess4(in.planes()[0].data(), out.planes()[0].data());\n> +\n> +\tmetadata.planes()[0].bytesused = out.planes()[0].size();\n> +\n> +\t/* Measure before emitting signals */\n> +\tif (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&\n> +\t    ++measuredFrames_ > DebayerCpu::kFramesToSkip) {\n> +\t\ttimespec frameEndTime = {};\n> +\t\tclock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);\n> +\t\tframeProcessTime_ += timeDiff(frameEndTime, frameStartTime);\n> +\t\tif (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) {\n> +\t\t\tconst unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure -\n> +\t\t\t\t\t\t\t    DebayerCpu::kFramesToSkip;\n> +\t\t\tLOG(Debayer, Info)\n> +\t\t\t\t<< \"Processed \" << measuredFrames\n> +\t\t\t\t<< \" frames in \" << frameProcessTime_ / 1000 << \"us, \"\n> +\t\t\t\t<< frameProcessTime_ / (1000 * measuredFrames)\n> +\t\t\t\t<< \" us/frame\";\n> +\t\t}\n> +\t}\n> +\n> +\tstats_->finishFrame();\n> +\toutputBufferReady.emit(output);\n> +\tinputBufferReady.emit(input);\n> +}\n> +\n> +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)\n> +{\n> +\tSize pattern_size = patternSize(inputFormat);\n> +\tunsigned int border_height = pattern_size.height;\n> +\n> +\tif (pattern_size.isNull())\n> +\t\treturn {};\n> +\n> +\t/* No need for top/bottom border with a pattern height of 2 */\n> +\tif (pattern_size.height == 2)\n> +\t\tborder_height = 0;\n> +\n> +\t/*\n> +\t * For debayer interpolation a border is kept around the entire image\n> +\t * and the minimum output size is pattern-height x pattern-width.\n> +\t */\n> +\tif (inputSize.width < (3 * pattern_size.width) ||\n> +\t    inputSize.height < (2 * border_height + pattern_size.height)) {\n> +\t\tLOG(Debayer, Warning)\n> +\t\t\t<< \"Input format size too small: \" << inputSize.toString();\n> +\t\treturn {};\n> +\t}\n> +\n> +\treturn SizeRange(Size(pattern_size.width, pattern_size.height),\n> +\t\t\t Size((inputSize.width - 2 * pattern_size.width) & ~(pattern_size.width - 1),\n> +\t\t\t      (inputSize.height - 2 * border_height) & ~(pattern_size.height - 1)),\n> +\t\t\t pattern_size.width, pattern_size.height);\n> +}\n> +\n> +} /* namespace libcamera */\n> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h\n> new file mode 100644\n> index 00000000..e0c4b9a8\n> --- /dev/null\n> +++ b/src/libcamera/software_isp/debayer_cpu.h\n> @@ -0,0 +1,143 @@\n> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n> +/*\n> + * Copyright (C) 2023, Linaro Ltd\n> + * Copyright (C) 2023, Red Hat Inc.\n> + *\n> + * Authors:\n> + * Hans de Goede <hdegoede@redhat.com>\n> + *\n> + * debayer_cpu.h - CPU based debayering header\n> + */\n> +\n> +#pragma once\n> +\n> +#include <memory>\n> +#include <stdint.h>\n> +#include <vector>\n> +\n> +#include <libcamera/base/object.h>\n> +\n> +#include \"debayer.h\"\n> +#include \"swstats_cpu.h\"\n> +\n> +namespace libcamera {\n> +\n> +class DebayerCpu : public Debayer, public Object\n> +{\n> +public:\n> +\tDebayerCpu(std::unique_ptr<SwStatsCpu> stats);\n> +\t~DebayerCpu();\n> +\n> +\tint configure(const StreamConfiguration &inputCfg,\n> +\t\t      const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);\n> +\tSize patternSize(PixelFormat inputFormat);\n> +\tstd::vector<PixelFormat> formats(PixelFormat input);\n> +\tstd::tuple<unsigned int, unsigned int>\n> +\tstrideAndFrameSize(const PixelFormat &outputFormat, const Size &size);\n> +\tvoid process(FrameBuffer *input, FrameBuffer *output, DebayerParams params);\n> +\tSizeRange sizes(PixelFormat inputFormat, const Size &inputSize);\n> +\n> +\t/**\n> +\t * \\brief Get the file descriptor for the statistics.\n> +\t *\n> +\t * \\return the file descriptor pointing to the statistics.\n> +\t */\n> +\tconst SharedFD &getStatsFD() { return stats_->getStatsFD(); }\n> +\n> +\t/**\n> +\t * \\brief Get the output frame size.\n> +\t *\n> +\t * \\return The output frame size.\n> +\t */\n> +\tunsigned int frameSize() { return outputConfig_.frameSize; }\n> +\n> +private:\n> +\t/**\n> +\t * \\brief Called to debayer 1 line of Bayer input data to output format\n> +\t * \\param[out] dst Pointer to the start of the output line to write\n> +\t * \\param[in] src The input data\n> +\t *\n> +\t * Input data is an array of (patternSize_.height + 1) src\n> +\t * pointers each pointing to a line in the Bayer source. The middle\n> +\t * element of the array will point to the actual line being processed.\n> +\t * Earlier element(s) will point to the previous line(s) and later\n> +\t * element(s) to the next line(s).\n> +\t *\n> +\t * These functions take an array of src pointers, rather then\n> +\t * a single src pointer + a stride for the source, so that when the src\n> +\t * is slow uncached memory it can be copied to faster memory before\n> +\t * debayering. Debayering a standard 2x2 Bayer pattern requires access\n> +\t * to the previous and next src lines for interpolating the missing\n> +\t * colors. To allow copying the src lines only once 3 buffers each\n> +\t * holding a single line are used, re-using the oldest buffer for\n> +\t * the next line and the pointers are swizzled so that:\n> +\t * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line.\n> +\t * This way the 3 pointers passed to the debayer functions form\n> +\t * a sliding window over the src avoiding the need to copy each\n> +\t * line more then once.\n> +\t *\n> +\t * Similarly for bayer patterns which repeat every 4 lines, 5 src\n> +\t * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up\n> +\t * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down.\n> +\t */\n> +\ttypedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]);\n> +\n> +\t/* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */\n> +\tvoid debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\tvoid debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\tvoid debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\tvoid debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);\n> +\n> +\tstruct DebayerInputConfig {\n> +\t\tSize patternSize;\n> +\t\tunsigned int bpp; /* Memory used per pixel, not precision */\n> +\t\tunsigned int stride;\n> +\t\tstd::vector<PixelFormat> outputFormats;\n> +\t};\n> +\n> +\tstruct DebayerOutputConfig {\n> +\t\tunsigned int bpp; /* Memory used per pixel, not precision */\n> +\t\tunsigned int stride;\n> +\t\tunsigned int frameSize;\n> +\t};\n> +\n> +\tint getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);\n> +\tint getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);\n> +\tint setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat);\n> +\tvoid setupInputMemcpy(const uint8_t *linePointers[]);\n> +\tvoid shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);\n> +\tvoid memcpyNextLine(const uint8_t *linePointers[]);\n> +\tvoid process2(const uint8_t *src, uint8_t *dst);\n> +\tvoid process4(const uint8_t *src, uint8_t *dst);\n> +\n> +\tstatic constexpr unsigned int kGammaLookupSize = 1024;\n> +\tstatic constexpr unsigned int kRGBLookupSize = 256;\n> +\t/* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */\n> +\tstatic constexpr unsigned int kMaxLineBuffers = 5;\n> +\n> +\tstd::array<uint8_t, kGammaLookupSize> gamma_;\n> +\tstd::array<uint8_t, kRGBLookupSize> red_;\n> +\tstd::array<uint8_t, kRGBLookupSize> green_;\n> +\tstd::array<uint8_t, kRGBLookupSize> blue_;\n> +\tdebayerFn debayer0_;\n> +\tdebayerFn debayer1_;\n> +\tdebayerFn debayer2_;\n> +\tdebayerFn debayer3_;\n> +\tRectangle window_;\n> +\tDebayerInputConfig inputConfig_;\n> +\tDebayerOutputConfig outputConfig_;\n> +\tstd::unique_ptr<SwStatsCpu> stats_;\n> +\tuint8_t *lineBuffers_[kMaxLineBuffers];\n> +\tunsigned int lineBufferLength_;\n> +\tunsigned int lineBufferPadding_;\n> +\tunsigned int lineBufferIndex_;\n> +\tbool enableInputMemcpy_;\n> +\tfloat gamma_correction_;\n> +\tunsigned int measuredFrames_;\n> +\tint64_t frameProcessTime_;\n> +\t/* Skip 30 frames for things to stabilize then measure 30 frames */\n> +\tstatic constexpr unsigned int kFramesToSkip = 30;\n> +\tstatic constexpr unsigned int kLastFrameToMeasure = 60;\n> +};\n> +\n> +} /* namespace libcamera */\n> diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build\n> index 92fc90f3..dd5e70c1 100644\n> --- a/src/libcamera/software_isp/meson.build\n> +++ b/src/libcamera/software_isp/meson.build\n> @@ -2,5 +2,6 @@\n>   \n>   libcamera_sources += files([\n>       'debayer.cpp',\n> +    'debayer_cpu.cpp',\n>       'swstats_cpu.cpp',\n>   ])","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id D79E5BF415\n\tfor <parsemail@patchwork.libcamera.org>;\n\tMon, 19 Feb 2024 17:09:14 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 0B65862807;\n\tMon, 19 Feb 2024 18:09:14 +0100 (CET)","from perceval.ideasonboard.com (perceval.ideasonboard.com\n\t[213.167.242.64])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id C6F1661C9E\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tMon, 19 Feb 2024 18:09:12 +0100 (CET)","from [IPV6:2a00:6020:448c:6c00:3d54:470d:90a7:2302] (unknown\n\t[IPv6:2a00:6020:448c:6c00:3d54:470d:90a7:2302])\n\tby perceval.ideasonboard.com (Postfix) with ESMTPSA id C1E6E183;\n\tMon, 19 Feb 2024 18:09:05 +0100 (CET)"],"Authentication-Results":"lancelot.ideasonboard.com;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key;\n\tunprotected) header.d=ideasonboard.com header.i=@ideasonboard.com\n\theader.b=\"Uiq2Ff+l\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/simple; d=ideasonboard.com;\n\ts=mail; t=1708362546;\n\tbh=fvlv4nF/mqqT5e9mD+HJGqkrUcxycATrUzpXzdNWnb8=;\n\th=Date:Subject:To:Cc:References:From:In-Reply-To:From;\n\tb=Uiq2Ff+lViB4rPKIDEIxDcen9r9AlhVaXmgsnKAsV8lpQAqGGRNo9uh33jWSbmIii\n\tW2FEW3wVeh70C3jcCwaHWl7BX0ofIq7k6TceuKSB/fT/0EaXrBiTiqhjLurOXUrfWL\n\tF8eiBxCd6Qxcz42Y4lV3i++tuX4hNbAhMolUbESE=","Content-Type":"multipart/mixed;\n\tboundary=\"------------E0i5CmHDn4k3FkVJleUcn20Q\"","Message-ID":"<659d6c77-e8f3-4d8a-abeb-95c791c42662@ideasonboard.com>","Date":"Mon, 19 Feb 2024 18:09:08 +0100","MIME-Version":"1.0","User-Agent":"Mozilla Thunderbird","Subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","To":"Hans de Goede <hdegoede@redhat.com>, libcamera-devel@lists.libcamera.org","References":"<20240214170122.60754-1-hdegoede@redhat.com>\n\t<20240214170122.60754-9-hdegoede@redhat.com>","Content-Language":"en-US","From":"Stefan Klug <stefan.klug@ideasonboard.com>","In-Reply-To":"<20240214170122.60754-9-hdegoede@redhat.com>","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Cc":"Bryan O'Donoghue <bryan.odonoghue@linaro.org>,\n\tMaxime Ripard <mripard@redhat.com>, Pavel Machek <pavel@ucw.cz>,\n\tDennis Bonke <admin@dennisbonke.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":28700,"web_url":"https://patchwork.libcamera.org/comment/28700/","msgid":"<731614fe-d28f-4049-97ca-e7d637df893a@gmail.com>","date":"2024-02-20T10:54:45","subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","submitter":{"id":179,"url":"https://patchwork.libcamera.org/api/people/179/","name":"Andrei Konovalov","email":"andrey.konovalov.ynk@gmail.com"},"content":"Hi Stefan,\n\nOn 19.02.2024 20:09, Stefan Klug wrote:\n> Hi Hans,\n> \n> thanks for your work on the SoftISP.\n> \n> Am 14.02.24 um 18:01 schrieb Hans de Goede:\n>> Add CPU based debayering implementation. This initial implementation\n>> only supports debayering packed 10 bits per pixel bayer data in\n>> the 4 standard bayer orders.\n>>\n>> Doxygen documentation by Dennis Bonke.\n>>\n>> Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s\n>> Tested-by: Pavel Machek <pavel@ucw.cz>\n>> Reviewed-by: Pavel Machek <pavel@ucw.cz>\n>> Co-developed-by: Dennis Bonke <admin@dennisbonke.com>\n>> Signed-off-by: Dennis Bonke <admin@dennisbonke.com>\n>> Co-developed-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n>> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n>> Co-developed-by: Pavel Machek <pavel@ucw.cz>\n>> Signed-off-by: Pavel Machek <pavel@ucw.cz>\n>> Signed-off-by: Hans de Goede <hdegoede@redhat.com>\n>> ---\n>> Changes in v3:\n>> - Move debayer_cpu.h to src/libcamera/software_isp/\n>> - Move documentation to .cpp file\n>> - Document how/why an array of src pointers is passed to\n>>    the debayer functions\n>> ---\n>>   src/libcamera/software_isp/debayer_cpu.cpp | 619 +++++++++++++++++++++\n>>   src/libcamera/software_isp/debayer_cpu.h   | 143 +++++\n>>   src/libcamera/software_isp/meson.build     |   1 +\n>>   3 files changed, 763 insertions(+)\n>>   create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp\n>>   create mode 100644 src/libcamera/software_isp/debayer_cpu.h\n>>\n>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp\n>> new file mode 100644\n>> index 00000000..53e90776\n>> --- /dev/null\n>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n>> @@ -0,0 +1,619 @@\n>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n>> +/*\n>> + * Copyright (C) 2023, Linaro Ltd\n>> + * Copyright (C) 2023, Red Hat Inc.\n>> + *\n>> + * Authors:\n>> + * Hans de Goede <hdegoede@redhat.com>\n>> + *\n>> + * debayer_cpu.cpp - CPU based debayering class\n>> + */\n>> +\n>> +#include \"debayer_cpu.h\"\n>> +\n>> +#include <math.h>\n>> +#include <stdlib.h>\n>> +#include <time.h>\n>> +\n>> +#include <libcamera/formats.h>\n>> +\n>> +#include \"libcamera/internal/bayer_format.h\"\n>> +#include \"libcamera/internal/framebuffer.h\"\n>> +#include \"libcamera/internal/mapped_framebuffer.h\"\n>> +\n>> +namespace libcamera {\n>> +\n>> +/**\n>> + * \\class DebayerCpu\n>> + * \\brief Class for debayering on the CPU\n>> + *\n>> + * Implementation for CPU based debayering\n>> + */\n>> +\n>> +/**\n>> + * \\brief Constructs a DebayerCpu object.\n>> + * \\param[in] stats Pointer to the stats object to use.\n>> + */\n>> +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)\n> \n> I tried to use the ISP without statistics/regulation altogether and just set stats to a nullptr. This fails in a few places. \n> IMHO it would improve flexibility & reusability to allow that.\n> Attched is a patch with the modifications as I needed them anyways. Feel free to include them.\n\nYour patch itself looks OK for me (I haven't tested it though).\n\nBut I am curious what is the reason for not using stats?\nLeaving the debayer step only, one looses AWB (which doesn't need any particular\nsupport from the hardware and prevents the typical raw bayer green tint) and AE/AGC (which only\nneeds at least one of the two camera sensor controls, and with wrong exposure the\nimage may loose the information, and this can't be fully compensated by post processing).\n\nThanks,\nAndrei\n\n> Cheers Stefan\n> \n>> +    : stats_(std::move(stats)), gamma_correction_(1.0)\n>> +{\n>> +#ifdef __x86_64__\n>> +    enableInputMemcpy_ = false;\n>> +#else\n>> +    enableInputMemcpy_ = true;\n>> +#endif\n>> +    /* Initialize gamma to 1.0 curve */\n>> +    for (unsigned int i = 0; i < kGammaLookupSize; i++)\n>> +        gamma_[i] = i / 4;\n>> +\n>> +    for (unsigned int i = 0; i < kMaxLineBuffers; i++)\n>> +        lineBuffers_[i] = nullptr;\n>> +}\n>> +\n>> +DebayerCpu::~DebayerCpu()\n>> +{\n>> +    for (unsigned int i = 0; i < kMaxLineBuffers; i++)\n>> +        free(lineBuffers_[i]);\n>> +}\n>> +\n>> +// RGR\n>> +// GBG\n>> +// RGR\n>> +#define BGGR_BGR888(p, n, div)                                                                \\\n>> +    *dst++ = blue_[curr[x] / (div)];                                                      \\\n>> +    *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];       \\\n>> +    *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \\\n>> +    x++;\n>> +\n>> +// GBG\n>> +// RGR\n>> +// GBG\n>> +#define GRBG_BGR888(p, n, div)                                    \\\n>> +    *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \\\n>> +    *dst++ = green_[curr[x] / (div)];                         \\\n>> +    *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n>> +    x++;\n>> +\n>> +// GRG\n>> +// BGB\n>> +// GRG\n>> +#define GBRG_BGR888(p, n, div)                                     \\\n>> +    *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n>> +    *dst++ = green_[curr[x] / (div)];                          \\\n>> +    *dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \\\n>> +    x++;\n>> +\n>> +// BGB\n>> +// GRG\n>> +// BGB\n>> +#define RGGB_BGR888(p, n, div)                                                                 \\\n>> +    *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \\\n>> +    *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))];        \\\n>> +    *dst++ = red_[curr[x] / (div)];                                                        \\\n>> +    x++;\n>> +\n>> +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])\n>> +{\n>> +    const int width_in_bytes = window_.width * 5 / 4;\n>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>> +    const uint8_t *next = (const uint8_t *)src[2];\n>> +\n>> +    /*\n>> +     * For the first pixel getting a pixel from the previous column uses\n>> +     * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.\n>> +     * Same for last pixel (uses x + 2) and looking at the next column.\n>> +     * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed.\n>> +     */\n>> +    for (int x = 0; x < width_in_bytes; x++) {\n>> +        /* First pixel */\n>> +        BGGR_BGR888(2, 1, 1)\n>> +        /* Second pixel BGGR -> GBRG */\n>> +        GBRG_BGR888(1, 1, 1)\n>> +        /* Same thing for third and fourth pixels */\n>> +        BGGR_BGR888(1, 1, 1)\n>> +        GBRG_BGR888(1, 2, 1)\n>> +    }\n>> +}\n>> +\n>> +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])\n>> +{\n>> +    const int width_in_bytes = window_.width * 5 / 4;\n>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>> +    const uint8_t *next = (const uint8_t *)src[2];\n>> +\n>> +    for (int x = 0; x < width_in_bytes; x++) {\n>> +        /* First pixel */\n>> +        GRBG_BGR888(2, 1, 1)\n>> +        /* Second pixel GRBG -> RGGB */\n>> +        RGGB_BGR888(1, 1, 1)\n>> +        /* Same thing for third and fourth pixels */\n>> +        GRBG_BGR888(1, 1, 1)\n>> +        RGGB_BGR888(1, 2, 1)\n>> +    }\n>> +}\n>> +\n>> +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])\n>> +{\n>> +    const int width_in_bytes = window_.width * 5 / 4;\n>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>> +    const uint8_t *next = (const uint8_t *)src[2];\n>> +\n>> +    for (int x = 0; x < width_in_bytes; x++) {\n>> +        /* Even pixel */\n>> +        GBRG_BGR888(2, 1, 1)\n>> +        /* Odd pixel GBGR -> BGGR */\n>> +        BGGR_BGR888(1, 1, 1)\n>> +        /* Same thing for next 2 pixels */\n>> +        GBRG_BGR888(1, 1, 1)\n>> +        BGGR_BGR888(1, 2, 1)\n>> +    }\n>> +}\n>> +\n>> +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])\n>> +{\n>> +    const int width_in_bytes = window_.width * 5 / 4;\n>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>> +    const uint8_t *next = (const uint8_t *)src[2];\n>> +\n>> +    for (int x = 0; x < width_in_bytes; x++) {\n>> +        /* Even pixel */\n>> +        RGGB_BGR888(2, 1, 1)\n>> +        /* Odd pixel RGGB -> GRBG*/\n>> +        GRBG_BGR888(1, 1, 1)\n>> +        /* Same thing for next 2 pixels */\n>> +        RGGB_BGR888(1, 1, 1)\n>> +        GRBG_BGR888(1, 2, 1)\n>> +    }\n>> +}\n>> +\n>> +static bool isStandardBayerOrder(BayerFormat::Order order)\n>> +{\n>> +    return order == BayerFormat::BGGR || order == BayerFormat::GBRG ||\n>> +           order == BayerFormat::GRBG || order == BayerFormat::RGGB;\n>> +}\n>> +\n>> +/*\n>> + * Setup the Debayer object according to the passed in parameters.\n>> + * Return 0 on success, a negative errno value on failure\n>> + * (unsupported parameters).\n>> + */\n>> +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)\n>> +{\n>> +    BayerFormat bayerFormat =\n>> +        BayerFormat::fromPixelFormat(inputFormat);\n>> +\n>> +    if (bayerFormat.bitDepth == 10 &&\n>> +        bayerFormat.packing == BayerFormat::Packing::CSI2 &&\n>> +        isStandardBayerOrder(bayerFormat.order)) {\n>> +        config.bpp = 10;\n>> +        config.patternSize.width = 4; /* 5 bytes per *4* pixels */\n>> +        config.patternSize.height = 2;\n>> +        config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 });\n>> +        return 0;\n>> +    }\n>> +\n>> +    LOG(Debayer, Info)\n>> +        << \"Unsupported input format \" << inputFormat.toString();\n>> +    return -EINVAL;\n>> +}\n>> +\n>> +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)\n>> +{\n>> +    if (outputFormat == formats::RGB888) {\n>> +        config.bpp = 24;\n>> +        return 0;\n>> +    }\n>> +\n>> +    LOG(Debayer, Info)\n>> +        << \"Unsupported output format \" << outputFormat.toString();\n>> +    return -EINVAL;\n>> +}\n>> +\n>> +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */\n>> +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat)\n>> +{\n>> +    BayerFormat bayerFormat =\n>> +        BayerFormat::fromPixelFormat(inputFormat);\n>> +\n>> +    if (bayerFormat.bitDepth == 10 &&\n>> +        bayerFormat.packing == BayerFormat::Packing::CSI2) {\n>> +        switch (bayerFormat.order) {\n>> +        case BayerFormat::BGGR:\n>> +            debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n>> +            debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n>> +            return 0;\n>> +        case BayerFormat::GBRG:\n>> +            debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n>> +            debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n>> +            return 0;\n>> +        case BayerFormat::GRBG:\n>> +            debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n>> +            debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n>> +            return 0;\n>> +        case BayerFormat::RGGB:\n>> +            debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n>> +            debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n>> +            return 0;\n>> +        default:\n>> +            break;\n>> +        }\n>> +    }\n>> +\n>> +    LOG(Debayer, Error) << \"Unsupported input output format combination\";\n>> +    return -EINVAL;\n>> +}\n>> +\n>> +int DebayerCpu::configure(const StreamConfiguration &inputCfg,\n>> +              const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)\n>> +{\n>> +    if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)\n>> +        return -EINVAL;\n>> +\n>> +    if (stats_->configure(inputCfg) != 0)\n>> +        return -EINVAL;\n>> +\n>> +    const Size &stats_pattern_size = stats_->patternSize();\n>> +    if (inputConfig_.patternSize.width != stats_pattern_size.width ||\n>> +        inputConfig_.patternSize.height != stats_pattern_size.height) {\n>> +        LOG(Debayer, Error)\n>> +            << \"mismatching stats and debayer pattern sizes for \"\n>> +            << inputCfg.pixelFormat.toString();\n>> +        return -EINVAL;\n>> +    }\n>> +\n>> +    inputConfig_.stride = inputCfg.stride;\n>> +\n>> +    if (outputCfgs.size() != 1) {\n>> +        LOG(Debayer, Error)\n>> +            << \"Unsupported number of output streams: \"\n>> +            << outputCfgs.size();\n>> +        return -EINVAL;\n>> +    }\n>> +\n>> +    const StreamConfiguration &outputCfg = outputCfgs[0];\n>> +    SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);\n>> +    std::tie(outputConfig_.stride, outputConfig_.frameSize) =\n>> +        strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);\n>> +\n>> +    if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {\n>> +        LOG(Debayer, Error)\n>> +            << \"Invalid output size/stride: \"\n>> +            << \"\\n  \" << outputCfg.size << \" (\" << outSizeRange << \")\"\n>> +            << \"\\n  \" << outputCfg.stride << \" (\" << outputConfig_.stride << \")\";\n>> +        return -EINVAL;\n>> +    }\n>> +\n>> +    if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0)\n>> +        return -EINVAL;\n>> +\n>> +    window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &\n>> +            ~(inputConfig_.patternSize.width - 1);\n>> +    window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &\n>> +            ~(inputConfig_.patternSize.height - 1);\n>> +    window_.width = outputCfg.size.width;\n>> +    window_.height = outputCfg.size.height;\n>> +\n>> +    /* Don't pass x,y since process() already adjusts src before passing it */\n>> +    stats_->setWindow(Rectangle(window_.size()));\n>> +\n>> +    /* pad with patternSize.Width on both left and right side */\n>> +    lineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8;\n>> +    lineBufferLength_ = window_.width * inputConfig_.bpp / 8 +\n>> +                2 * lineBufferPadding_;\n>> +    for (unsigned int i = 0;\n>> +         i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_;\n>> +         i++) {\n>> +        free(lineBuffers_[i]);\n>> +        lineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_);\n>> +        if (!lineBuffers_[i])\n>> +            return -ENOMEM;\n>> +    }\n>> +\n>> +    measuredFrames_ = 0;\n>> +    frameProcessTime_ = 0;\n>> +\n>> +    return 0;\n>> +}\n>> +\n>> +/*\n>> + * Get width and height at which the bayer-pattern repeats.\n>> + * Return pattern-size or an empty Size for an unsupported inputFormat.\n>> + */\n>> +Size DebayerCpu::patternSize(PixelFormat inputFormat)\n>> +{\n>> +    DebayerCpu::DebayerInputConfig config;\n>> +\n>> +    if (getInputConfig(inputFormat, config) != 0)\n>> +        return {};\n>> +\n>> +    return config.patternSize;\n>> +}\n>> +\n>> +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)\n>> +{\n>> +    DebayerCpu::DebayerInputConfig config;\n>> +\n>> +    if (getInputConfig(inputFormat, config) != 0)\n>> +        return std::vector<PixelFormat>();\n>> +\n>> +    return config.outputFormats;\n>> +}\n>> +\n>> +std::tuple<unsigned int, unsigned int>\n>> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)\n>> +{\n>> +    DebayerCpu::DebayerOutputConfig config;\n>> +\n>> +    if (getOutputConfig(outputFormat, config) != 0)\n>> +        return std::make_tuple(0, 0);\n>> +\n>> +    /* round up to multiple of 8 for 64 bits alignment */\n>> +    unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;\n>> +\n>> +    return std::make_tuple(stride, stride * size.height);\n>> +}\n>> +\n>> +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n>> +{\n>> +    const unsigned int patternHeight = inputConfig_.patternSize.height;\n>> +\n>> +    if (!enableInputMemcpy_)\n>> +        return;\n>> +\n>> +    for (unsigned int i = 0; i < patternHeight; i++) {\n>> +        memcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_,\n>> +               lineBufferLength_);\n>> +        linePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_;\n>> +    }\n>> +\n>> +    /* Point lineBufferIndex_ to first unused lineBuffer */\n>> +    lineBufferIndex_ = patternHeight;\n>> +}\n>> +\n>> +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)\n>> +{\n>> +    const unsigned int patternHeight = inputConfig_.patternSize.height;\n>> +\n>> +    for (unsigned int i = 0; i < patternHeight; i++)\n>> +        linePointers[i] = linePointers[i + 1];\n>> +\n>> +    linePointers[patternHeight] = src +\n>> +                      (patternHeight / 2) * (int)inputConfig_.stride;\n>> +}\n>> +\n>> +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])\n>> +{\n>> +    const unsigned int patternHeight = inputConfig_.patternSize.height;\n>> +\n>> +    if (!enableInputMemcpy_)\n>> +        return;\n>> +\n>> +    memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_,\n>> +           lineBufferLength_);\n>> +    linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_;\n>> +\n>> +    lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);\n>> +}\n>> +\n>> +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)\n>> +{\n>> +    unsigned int y_end = window_.y + window_.height;\n>> +    /* Holds [0] previous- [1] current- [2] next-line */\n>> +    const uint8_t *linePointers[3];\n>> +\n>> +    /* Adjust src to top left corner of the window */\n>> +    src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n>> +\n>> +    /* [x] becomes [x - 1] after initial shiftLinePointers() call */\n>> +    if (window_.y) {\n>> +        linePointers[1] = src - inputConfig_.stride; /* previous-line */\n>> +        linePointers[2] = src;\n>> +    } else {\n>> +        /* window_.y == 0, use the next line as prev line */\n>> +        linePointers[1] = src + inputConfig_.stride;\n>> +        linePointers[2] = src;\n>> +        /* Last 2 lines also need special handling */\n>> +        y_end -= 2;\n>> +    }\n>> +\n>> +    setupInputMemcpy(linePointers);\n>> +\n>> +    for (unsigned int y = window_.y; y < y_end; y += 2) {\n>> +        shiftLinePointers(linePointers, src);\n>> +        memcpyNextLine(linePointers);\n>> +        stats_->processLine0(y, linePointers);\n>> +        (this->*debayer0_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +\n>> +        shiftLinePointers(linePointers, src);\n>> +        memcpyNextLine(linePointers);\n>> +        (this->*debayer1_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +    }\n>> +\n>> +    if (window_.y == 0) {\n>> +        shiftLinePointers(linePointers, src);\n>> +        memcpyNextLine(linePointers);\n>> +        stats_->processLine0(y_end, linePointers);\n>> +        (this->*debayer0_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +\n>> +        shiftLinePointers(linePointers, src);\n>> +        /* next line may point outside of src, use prev. */\n>> +        linePointers[2] = linePointers[0];\n>> +        (this->*debayer1_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +    }\n>> +}\n>> +\n>> +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)\n>> +{\n>> +    const unsigned int y_end = window_.y + window_.height;\n>> +    /*\n>> +     * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line\n>> +     * [3] 1-line-down [4] 2-lines-down.\n>> +     */\n>> +    const uint8_t *linePointers[5];\n>> +\n>> +    /* Adjust src to top left corner of the window */\n>> +    src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;\n>> +\n>> +    /* [x] becomes [x - 1] after initial shiftLinePointers() call */\n>> +    linePointers[1] = src - 2 * inputConfig_.stride;\n>> +    linePointers[2] = src - inputConfig_.stride;\n>> +    linePointers[3] = src;\n>> +    linePointers[4] = src + inputConfig_.stride;\n>> +\n>> +    setupInputMemcpy(linePointers);\n>> +\n>> +    for (unsigned int y = window_.y; y < y_end; y += 4) {\n>> +        shiftLinePointers(linePointers, src);\n>> +        memcpyNextLine(linePointers);\n>> +        stats_->processLine0(y, linePointers);\n>> +        (this->*debayer0_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +\n>> +        shiftLinePointers(linePointers, src);\n>> +        memcpyNextLine(linePointers);\n>> +        (this->*debayer1_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +\n>> +        shiftLinePointers(linePointers, src);\n>> +        memcpyNextLine(linePointers);\n>> +        stats_->processLine2(y, linePointers);\n>> +        (this->*debayer2_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +\n>> +        shiftLinePointers(linePointers, src);\n>> +        memcpyNextLine(linePointers);\n>> +        (this->*debayer3_)(dst, linePointers);\n>> +        src += inputConfig_.stride;\n>> +        dst += outputConfig_.stride;\n>> +    }\n>> +}\n>> +\n>> +static inline int64_t timeDiff(timespec &after, timespec &before)\n>> +{\n>> +    return (after.tv_sec - before.tv_sec) * 1000000000LL +\n>> +           (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;\n>> +}\n>> +\n>> +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)\n>> +{\n>> +    timespec frameStartTime;\n>> +\n>> +    if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) {\n>> +        frameStartTime = {};\n>> +        clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);\n>> +    }\n>> +\n>> +    /* Apply DebayerParams */\n>> +    if (params.gamma != gamma_correction_) {\n>> +        for (unsigned int i = 0; i < kGammaLookupSize; i++)\n>> +            gamma_[i] = UINT8_MAX * powf(i / (kGammaLookupSize - 1.0), params.gamma);\n>> +\n>> +        gamma_correction_ = params.gamma;\n>> +    }\n>> +\n>> +    for (unsigned int i = 0; i < kRGBLookupSize; i++) {\n>> +        constexpr unsigned int div =\n>> +            kRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize;\n>> +        unsigned int idx;\n>> +\n>> +        /* Apply gamma after gain! */\n>> +        idx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) });\n>> +        red_[i] = gamma_[idx];\n>> +\n>> +        idx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) });\n>> +        green_[i] = gamma_[idx];\n>> +\n>> +        idx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) });\n>> +        blue_[i] = gamma_[idx];\n>> +    }\n>> +\n>> +    /* Copy metadata from the input buffer */\n>> +    FrameMetadata &metadata = output->_d()->metadata();\n>> +    metadata.status = input->metadata().status;\n>> +    metadata.sequence = input->metadata().sequence;\n>> +    metadata.timestamp = input->metadata().timestamp;\n>> +\n>> +    MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);\n>> +    MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);\n>> +    if (!in.isValid() || !out.isValid()) {\n>> +        LOG(Debayer, Error) << \"mmap-ing buffer(s) failed\";\n>> +        metadata.status = FrameMetadata::FrameError;\n>> +        return;\n>> +    }\n>> +\n>> +    stats_->startFrame();\n>> +\n>> +    if (inputConfig_.patternSize.height == 2)\n>> +        process2(in.planes()[0].data(), out.planes()[0].data());\n>> +    else\n>> +        process4(in.planes()[0].data(), out.planes()[0].data());\n>> +\n>> +    metadata.planes()[0].bytesused = out.planes()[0].size();\n>> +\n>> +    /* Measure before emitting signals */\n>> +    if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&\n>> +        ++measuredFrames_ > DebayerCpu::kFramesToSkip) {\n>> +        timespec frameEndTime = {};\n>> +        clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);\n>> +        frameProcessTime_ += timeDiff(frameEndTime, frameStartTime);\n>> +        if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) {\n>> +            const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure -\n>> +                                DebayerCpu::kFramesToSkip;\n>> +            LOG(Debayer, Info)\n>> +                << \"Processed \" << measuredFrames\n>> +                << \" frames in \" << frameProcessTime_ / 1000 << \"us, \"\n>> +                << frameProcessTime_ / (1000 * measuredFrames)\n>> +                << \" us/frame\";\n>> +        }\n>> +    }\n>> +\n>> +    stats_->finishFrame();\n>> +    outputBufferReady.emit(output);\n>> +    inputBufferReady.emit(input);\n>> +}\n>> +\n>> +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)\n>> +{\n>> +    Size pattern_size = patternSize(inputFormat);\n>> +    unsigned int border_height = pattern_size.height;\n>> +\n>> +    if (pattern_size.isNull())\n>> +        return {};\n>> +\n>> +    /* No need for top/bottom border with a pattern height of 2 */\n>> +    if (pattern_size.height == 2)\n>> +        border_height = 0;\n>> +\n>> +    /*\n>> +     * For debayer interpolation a border is kept around the entire image\n>> +     * and the minimum output size is pattern-height x pattern-width.\n>> +     */\n>> +    if (inputSize.width < (3 * pattern_size.width) ||\n>> +        inputSize.height < (2 * border_height + pattern_size.height)) {\n>> +        LOG(Debayer, Warning)\n>> +            << \"Input format size too small: \" << inputSize.toString();\n>> +        return {};\n>> +    }\n>> +\n>> +    return SizeRange(Size(pattern_size.width, pattern_size.height),\n>> +             Size((inputSize.width - 2 * pattern_size.width) & ~(pattern_size.width - 1),\n>> +                  (inputSize.height - 2 * border_height) & ~(pattern_size.height - 1)),\n>> +             pattern_size.width, pattern_size.height);\n>> +}\n>> +\n>> +} /* namespace libcamera */\n>> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h\n>> new file mode 100644\n>> index 00000000..e0c4b9a8\n>> --- /dev/null\n>> +++ b/src/libcamera/software_isp/debayer_cpu.h\n>> @@ -0,0 +1,143 @@\n>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n>> +/*\n>> + * Copyright (C) 2023, Linaro Ltd\n>> + * Copyright (C) 2023, Red Hat Inc.\n>> + *\n>> + * Authors:\n>> + * Hans de Goede <hdegoede@redhat.com>\n>> + *\n>> + * debayer_cpu.h - CPU based debayering header\n>> + */\n>> +\n>> +#pragma once\n>> +\n>> +#include <memory>\n>> +#include <stdint.h>\n>> +#include <vector>\n>> +\n>> +#include <libcamera/base/object.h>\n>> +\n>> +#include \"debayer.h\"\n>> +#include \"swstats_cpu.h\"\n>> +\n>> +namespace libcamera {\n>> +\n>> +class DebayerCpu : public Debayer, public Object\n>> +{\n>> +public:\n>> +    DebayerCpu(std::unique_ptr<SwStatsCpu> stats);\n>> +    ~DebayerCpu();\n>> +\n>> +    int configure(const StreamConfiguration &inputCfg,\n>> +              const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);\n>> +    Size patternSize(PixelFormat inputFormat);\n>> +    std::vector<PixelFormat> formats(PixelFormat input);\n>> +    std::tuple<unsigned int, unsigned int>\n>> +    strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);\n>> +    void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params);\n>> +    SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);\n>> +\n>> +    /**\n>> +     * \\brief Get the file descriptor for the statistics.\n>> +     *\n>> +     * \\return the file descriptor pointing to the statistics.\n>> +     */\n>> +    const SharedFD &getStatsFD() { return stats_->getStatsFD(); }\n>> +\n>> +    /**\n>> +     * \\brief Get the output frame size.\n>> +     *\n>> +     * \\return The output frame size.\n>> +     */\n>> +    unsigned int frameSize() { return outputConfig_.frameSize; }\n>> +\n>> +private:\n>> +    /**\n>> +     * \\brief Called to debayer 1 line of Bayer input data to output format\n>> +     * \\param[out] dst Pointer to the start of the output line to write\n>> +     * \\param[in] src The input data\n>> +     *\n>> +     * Input data is an array of (patternSize_.height + 1) src\n>> +     * pointers each pointing to a line in the Bayer source. The middle\n>> +     * element of the array will point to the actual line being processed.\n>> +     * Earlier element(s) will point to the previous line(s) and later\n>> +     * element(s) to the next line(s).\n>> +     *\n>> +     * These functions take an array of src pointers, rather then\n>> +     * a single src pointer + a stride for the source, so that when the src\n>> +     * is slow uncached memory it can be copied to faster memory before\n>> +     * debayering. Debayering a standard 2x2 Bayer pattern requires access\n>> +     * to the previous and next src lines for interpolating the missing\n>> +     * colors. To allow copying the src lines only once 3 buffers each\n>> +     * holding a single line are used, re-using the oldest buffer for\n>> +     * the next line and the pointers are swizzled so that:\n>> +     * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line.\n>> +     * This way the 3 pointers passed to the debayer functions form\n>> +     * a sliding window over the src avoiding the need to copy each\n>> +     * line more then once.\n>> +     *\n>> +     * Similarly for bayer patterns which repeat every 4 lines, 5 src\n>> +     * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up\n>> +     * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down.\n>> +     */\n>> +    typedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]);\n>> +\n>> +    /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */\n>> +    void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);\n>> +    void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);\n>> +    void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);\n>> +    void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);\n>> +\n>> +    struct DebayerInputConfig {\n>> +        Size patternSize;\n>> +        unsigned int bpp; /* Memory used per pixel, not precision */\n>> +        unsigned int stride;\n>> +        std::vector<PixelFormat> outputFormats;\n>> +    };\n>> +\n>> +    struct DebayerOutputConfig {\n>> +        unsigned int bpp; /* Memory used per pixel, not precision */\n>> +        unsigned int stride;\n>> +        unsigned int frameSize;\n>> +    };\n>> +\n>> +    int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);\n>> +    int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);\n>> +    int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat);\n>> +    void setupInputMemcpy(const uint8_t *linePointers[]);\n>> +    void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);\n>> +    void memcpyNextLine(const uint8_t *linePointers[]);\n>> +    void process2(const uint8_t *src, uint8_t *dst);\n>> +    void process4(const uint8_t *src, uint8_t *dst);\n>> +\n>> +    static constexpr unsigned int kGammaLookupSize = 1024;\n>> +    static constexpr unsigned int kRGBLookupSize = 256;\n>> +    /* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */\n>> +    static constexpr unsigned int kMaxLineBuffers = 5;\n>> +\n>> +    std::array<uint8_t, kGammaLookupSize> gamma_;\n>> +    std::array<uint8_t, kRGBLookupSize> red_;\n>> +    std::array<uint8_t, kRGBLookupSize> green_;\n>> +    std::array<uint8_t, kRGBLookupSize> blue_;\n>> +    debayerFn debayer0_;\n>> +    debayerFn debayer1_;\n>> +    debayerFn debayer2_;\n>> +    debayerFn debayer3_;\n>> +    Rectangle window_;\n>> +    DebayerInputConfig inputConfig_;\n>> +    DebayerOutputConfig outputConfig_;\n>> +    std::unique_ptr<SwStatsCpu> stats_;\n>> +    uint8_t *lineBuffers_[kMaxLineBuffers];\n>> +    unsigned int lineBufferLength_;\n>> +    unsigned int lineBufferPadding_;\n>> +    unsigned int lineBufferIndex_;\n>> +    bool enableInputMemcpy_;\n>> +    float gamma_correction_;\n>> +    unsigned int measuredFrames_;\n>> +    int64_t frameProcessTime_;\n>> +    /* Skip 30 frames for things to stabilize then measure 30 frames */\n>> +    static constexpr unsigned int kFramesToSkip = 30;\n>> +    static constexpr unsigned int kLastFrameToMeasure = 60;\n>> +};\n>> +\n>> +} /* namespace libcamera */\n>> diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build\n>> index 92fc90f3..dd5e70c1 100644\n>> --- a/src/libcamera/software_isp/meson.build\n>> +++ b/src/libcamera/software_isp/meson.build\n>> @@ -2,5 +2,6 @@\n>>   libcamera_sources += files([\n>>       'debayer.cpp',\n>> +    'debayer_cpu.cpp',\n>>       'swstats_cpu.cpp',\n>>   ])\n>","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id DB7EBBF415\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 20 Feb 2024 10:54:50 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id D44AB62813;\n\tTue, 20 Feb 2024 11:54:49 +0100 (CET)","from mail-ed1-x530.google.com (mail-ed1-x530.google.com\n\t[IPv6:2a00:1450:4864:20::530])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 5C23C62801\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 20 Feb 2024 11:54:48 +0100 (CET)","by mail-ed1-x530.google.com with SMTP id\n\t4fb4d7f45d1cf-563bb51c36eso5759264a12.2\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 20 Feb 2024 02:54:48 -0800 (PST)","from [192.168.118.26] ([87.116.160.233])\n\tby smtp.gmail.com with ESMTPSA id\n\tst11-20020a170907c08b00b00a3e559aaff9sm2825457ejc.29.2024.02.20.02.54.46\n\t(version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);\n\tTue, 20 Feb 2024 02:54:46 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"huNSWjDW\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=gmail.com; s=20230601; t=1708426488; x=1709031288;\n\tdarn=lists.libcamera.org; \n\th=content-transfer-encoding:in-reply-to:from:content-language\n\t:references:cc:to:subject:user-agent:mime-version:date:message-id\n\t:from:to:cc:subject:date:message-id:reply-to;\n\tbh=fD3+o6A2MFaLRHOC32N40BJXhJhCHFRpO/rAbNzvMSw=;\n\tb=huNSWjDWIG735esl154dmi1bMSMxuu4cG5N323exp5eAwdtVrbIoZ74lPDU5NeCWUe\n\tG+BtE1SnfOnTJAK/PoGStjn2zPZiZ8aSuOzwffl/zDVSdLCPEZoHSJTYcE/4zn+PjfLR\n\tkYz5hG7LCQiuXMUy+1gZCe0rTNTfU/fkzEUmctyHrxG0xl0y8mWwDvBWkNJh8bPKPZ6N\n\tt49lqKicBD0ZTg8Znw1G2QpD0bw5y4kWpLjglcCnhWUbQowx6Avz5cpLyDAdUKRr2Q75\n\tXfBRweCluJewXZ6KzbVDw3O4nc9nLu6u4xvMWKdwcb3gwOoad3gHK/t8cjcWX56ygzpA\n\trpqQ==","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1708426488; x=1709031288;\n\th=content-transfer-encoding:in-reply-to:from:content-language\n\t:references:cc:to:subject:user-agent:mime-version:date:message-id\n\t:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;\n\tbh=fD3+o6A2MFaLRHOC32N40BJXhJhCHFRpO/rAbNzvMSw=;\n\tb=pbUtrzMSmPUcGCm4w3embXGvTA2TZjik4h4DHGKUORh09q4gwR1EztYHXEWK+oZy5r\n\tt9Z51qHVzVsGrnsU5gnFPzy3ABXUggIl+qlPfwG0dbzlwqxQ05oRRY3JRSj6p7laDGDV\n\tYOr5CrX04d9++4NlV53UKLWZF/gYvwPSjBCCz7eSSth/ESqze6Mpl5Ow40GSw+AAqtGe\n\tBEJjZiPoKUWytOjTDsr11fM8hH2xf9d/alGGkDmXch7u0DfPYhvx+JfzW60Q3u6qjpWT\n\tLiFnryfge7alcz4IliOT679U7GVwcAiTkndq1e9+LGKH8U3N62MRwOkQ9Lf4XRDJODhq\n\tDKYA==","X-Forwarded-Encrypted":"i=1;\n\tAJvYcCUz95ORq3lATw2rIGnqHf/M9/lRzGByo45eCSRIi2h3uSlz7iMAoM89EKJET6mIvnTHSNf6Wfh69/XtAY8XWSLDroN5386cIsbdr8yFLanpARk88g==","X-Gm-Message-State":"AOJu0YwD6d8jLkilVDl8tVnPolzZRBlxlujNWk6hrCazKSh1RHlzYwKv\n\tCkUvHRhzNzTT4hnDX0AAxG5PEPj13UuW79ku4dZvxq69fuC5NgNOLNUOTPpD","X-Google-Smtp-Source":"AGHT+IGGYbaiyGzFZffBgSIPujWX1+B5H5HTZnfORtlbioF7q5eOXCbu4LxWbQwvoX1ihzuoS4XGSw==","X-Received":"by 2002:a17:906:27d4:b0:a3e:b0b4:a83b with SMTP id\n\tk20-20020a17090627d400b00a3eb0b4a83bmr3046143ejc.37.1708426487258; \n\tTue, 20 Feb 2024 02:54:47 -0800 (PST)","Message-ID":"<731614fe-d28f-4049-97ca-e7d637df893a@gmail.com>","Date":"Tue, 20 Feb 2024 13:54:45 +0300","MIME-Version":"1.0","User-Agent":"Mozilla Thunderbird","Subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","To":"Stefan Klug <stefan.klug@ideasonboard.com>,\n\tHans de Goede <hdegoede@redhat.com>, libcamera-devel@lists.libcamera.org","References":"<20240214170122.60754-1-hdegoede@redhat.com>\n\t<20240214170122.60754-9-hdegoede@redhat.com>\n\t<659d6c77-e8f3-4d8a-abeb-95c791c42662@ideasonboard.com>","Content-Language":"en-US","From":"Andrei Konovalov <andrey.konovalov.ynk@gmail.com>","In-Reply-To":"<659d6c77-e8f3-4d8a-abeb-95c791c42662@ideasonboard.com>","Content-Type":"text/plain; charset=UTF-8; format=flowed","Content-Transfer-Encoding":"8bit","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Cc":"Bryan O'Donoghue <bryan.odonoghue@linaro.org>,\n\tMaxime Ripard <mripard@redhat.com>, Pavel Machek <pavel@ucw.cz>,\n\tDennis Bonke <admin@dennisbonke.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":28701,"web_url":"https://patchwork.libcamera.org/comment/28701/","msgid":"<dce98834-99ab-4941-aceb-28142ec998f9@ideasonboard.com>","date":"2024-02-20T12:26:14","subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","submitter":{"id":184,"url":"https://patchwork.libcamera.org/api/people/184/","name":"Stefan Klug","email":"stefan.klug@ideasonboard.com"},"content":"Hi Andrei,\n\nAm 20.02.24 um 11:54 schrieb Andrei Konovalov:\n> Hi Stefan,\n> \n> On 19.02.2024 20:09, Stefan Klug wrote:\n>> Hi Hans,\n>>\n>> thanks for your work on the SoftISP.\n>>\n>> Am 14.02.24 um 18:01 schrieb Hans de Goede:\n>>> Add CPU based debayering implementation. This initial implementation\n>>> only supports debayering packed 10 bits per pixel bayer data in\n>>> the 4 standard bayer orders.\n>>>\n>>> Doxygen documentation by Dennis Bonke.\n>>>\n>>> Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp \n>>> Lenovo x13s\n>>> Tested-by: Pavel Machek <pavel@ucw.cz>\n>>> Reviewed-by: Pavel Machek <pavel@ucw.cz>\n>>> Co-developed-by: Dennis Bonke <admin@dennisbonke.com>\n>>> Signed-off-by: Dennis Bonke <admin@dennisbonke.com>\n>>> Co-developed-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n>>> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n>>> Co-developed-by: Pavel Machek <pavel@ucw.cz>\n>>> Signed-off-by: Pavel Machek <pavel@ucw.cz>\n>>> Signed-off-by: Hans de Goede <hdegoede@redhat.com>\n>>> ---\n>>> Changes in v3:\n>>> - Move debayer_cpu.h to src/libcamera/software_isp/\n>>> - Move documentation to .cpp file\n>>> - Document how/why an array of src pointers is passed to\n>>>    the debayer functions\n>>> ---\n>>>   src/libcamera/software_isp/debayer_cpu.cpp | 619 +++++++++++++++++++++\n>>>   src/libcamera/software_isp/debayer_cpu.h   | 143 +++++\n>>>   src/libcamera/software_isp/meson.build     |   1 +\n>>>   3 files changed, 763 insertions(+)\n>>>   create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp\n>>>   create mode 100644 src/libcamera/software_isp/debayer_cpu.h\n>>>\n>>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp \n>>> b/src/libcamera/software_isp/debayer_cpu.cpp\n>>> new file mode 100644\n>>> index 00000000..53e90776\n>>> --- /dev/null\n>>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n>>> @@ -0,0 +1,619 @@\n>>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n>>> +/*\n>>> + * Copyright (C) 2023, Linaro Ltd\n>>> + * Copyright (C) 2023, Red Hat Inc.\n>>> + *\n>>> + * Authors:\n>>> + * Hans de Goede <hdegoede@redhat.com>\n>>> + *\n>>> + * debayer_cpu.cpp - CPU based debayering class\n>>> + */\n>>> +\n>>> +#include \"debayer_cpu.h\"\n>>> +\n>>> +#include <math.h>\n>>> +#include <stdlib.h>\n>>> +#include <time.h>\n>>> +\n>>> +#include <libcamera/formats.h>\n>>> +\n>>> +#include \"libcamera/internal/bayer_format.h\"\n>>> +#include \"libcamera/internal/framebuffer.h\"\n>>> +#include \"libcamera/internal/mapped_framebuffer.h\"\n>>> +\n>>> +namespace libcamera {\n>>> +\n>>> +/**\n>>> + * \\class DebayerCpu\n>>> + * \\brief Class for debayering on the CPU\n>>> + *\n>>> + * Implementation for CPU based debayering\n>>> + */\n>>> +\n>>> +/**\n>>> + * \\brief Constructs a DebayerCpu object.\n>>> + * \\param[in] stats Pointer to the stats object to use.\n>>> + */\n>>> +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)\n>>\n>> I tried to use the ISP without statistics/regulation altogether and \n>> just set stats to a nullptr. This fails in a few places. IMHO it would \n>> improve flexibility & reusability to allow that.\n>> Attched is a patch with the modifications as I needed them anyways. \n>> Feel free to include them.\n> \n> Your patch itself looks OK for me (I haven't tested it though).\n> \n> But I am curious what is the reason for not using stats?\n> Leaving the debayer step only, one looses AWB (which doesn't need any \n> particular\n> support from the hardware and prevents the typical raw bayer green tint) \n> and AE/AGC (which only\n> needs at least one of the two camera sensor controls, and with wrong \n> exposure the\n> image may loose the information, and this can't be fully compensated by \n> post processing).\n\nI was working on the camera sensor implementation in libcamera and \nneeded to quickly display the debayered sensor image. No auto regulation \nshould happen as I wanted to manually control gain/exposure. One could \neven expose manual whitebalance gains for usecases under known \nconditions. Having the SoftIsp as a modular playground for such cases is \nquite helpful.\n\n> \n> Thanks,\n> Andrei\n> \n>> Cheers Stefan\n>>\n>>> +    : stats_(std::move(stats)), gamma_correction_(1.0)\n>>> +{\n>>> +#ifdef __x86_64__\n>>> +    enableInputMemcpy_ = false;\n>>> +#else\n>>> +    enableInputMemcpy_ = true;\n>>> +#endif\n>>> +    /* Initialize gamma to 1.0 curve */\n>>> +    for (unsigned int i = 0; i < kGammaLookupSize; i++)\n>>> +        gamma_[i] = i / 4;\n>>> +\n>>> +    for (unsigned int i = 0; i < kMaxLineBuffers; i++)\n>>> +        lineBuffers_[i] = nullptr;\n>>> +}\n>>> +\n>>> +DebayerCpu::~DebayerCpu()\n>>> +{\n>>> +    for (unsigned int i = 0; i < kMaxLineBuffers; i++)\n>>> +        free(lineBuffers_[i]);\n>>> +}\n>>> +\n>>> +// RGR\n>>> +// GBG\n>>> +// RGR\n>>> +#define BGGR_BGR888(p, n, \n>>> div)                                                                \\\n>>> +    *dst++ = blue_[curr[x] / \n>>> (div)];                                                      \\\n>>> +    *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) \n>>> / (4 * (div))];       \\\n>>> +    *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x \n>>> + n]) / (4 * (div))]; \\\n>>> +    x++;\n>>> +\n>>> +// GBG\n>>> +// RGR\n>>> +// GBG\n>>> +#define GRBG_BGR888(p, n, div)                                    \\\n>>> +    *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))];        \\\n>>> +    *dst++ = green_[curr[x] / (div)];                         \\\n>>> +    *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n>>> +    x++;\n>>> +\n>>> +// GRG\n>>> +// BGB\n>>> +// GRG\n>>> +#define GBRG_BGR888(p, n, div)                                     \\\n>>> +    *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \\\n>>> +    *dst++ = green_[curr[x] / (div)];                          \\\n>>> +    *dst++ = red_[(prev[x] + next[x]) / (2 * (div))];          \\\n>>> +    x++;\n>>> +\n>>> +// BGB\n>>> +// GRG\n>>> +// BGB\n>>> +#define RGGB_BGR888(p, n, \n>>> div)                                                                 \\\n>>> +    *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x \n>>> + n]) / (4 * (div))]; \\\n>>> +    *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) \n>>> / (4 * (div))];        \\\n>>> +    *dst++ = red_[curr[x] / \n>>> (div)];                                                        \\\n>>> +    x++;\n>>> +\n>>> +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t \n>>> *src[])\n>>> +{\n>>> +    const int width_in_bytes = window_.width * 5 / 4;\n>>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>>> +    const uint8_t *next = (const uint8_t *)src[2];\n>>> +\n>>> +    /*\n>>> +     * For the first pixel getting a pixel from the previous column \n>>> uses\n>>> +     * x - 2 to skip the 5th byte with least-significant bits for 4 \n>>> pixels.\n>>> +     * Same for last pixel (uses x + 2) and looking at the next column.\n>>> +     * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for \n>>> 10bit packed.\n>>> +     */\n>>> +    for (int x = 0; x < width_in_bytes; x++) {\n>>> +        /* First pixel */\n>>> +        BGGR_BGR888(2, 1, 1)\n>>> +        /* Second pixel BGGR -> GBRG */\n>>> +        GBRG_BGR888(1, 1, 1)\n>>> +        /* Same thing for third and fourth pixels */\n>>> +        BGGR_BGR888(1, 1, 1)\n>>> +        GBRG_BGR888(1, 2, 1)\n>>> +    }\n>>> +}\n>>> +\n>>> +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t \n>>> *src[])\n>>> +{\n>>> +    const int width_in_bytes = window_.width * 5 / 4;\n>>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>>> +    const uint8_t *next = (const uint8_t *)src[2];\n>>> +\n>>> +    for (int x = 0; x < width_in_bytes; x++) {\n>>> +        /* First pixel */\n>>> +        GRBG_BGR888(2, 1, 1)\n>>> +        /* Second pixel GRBG -> RGGB */\n>>> +        RGGB_BGR888(1, 1, 1)\n>>> +        /* Same thing for third and fourth pixels */\n>>> +        GRBG_BGR888(1, 1, 1)\n>>> +        RGGB_BGR888(1, 2, 1)\n>>> +    }\n>>> +}\n>>> +\n>>> +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t \n>>> *src[])\n>>> +{\n>>> +    const int width_in_bytes = window_.width * 5 / 4;\n>>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>>> +    const uint8_t *next = (const uint8_t *)src[2];\n>>> +\n>>> +    for (int x = 0; x < width_in_bytes; x++) {\n>>> +        /* Even pixel */\n>>> +        GBRG_BGR888(2, 1, 1)\n>>> +        /* Odd pixel GBGR -> BGGR */\n>>> +        BGGR_BGR888(1, 1, 1)\n>>> +        /* Same thing for next 2 pixels */\n>>> +        GBRG_BGR888(1, 1, 1)\n>>> +        BGGR_BGR888(1, 2, 1)\n>>> +    }\n>>> +}\n>>> +\n>>> +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t \n>>> *src[])\n>>> +{\n>>> +    const int width_in_bytes = window_.width * 5 / 4;\n>>> +    const uint8_t *prev = (const uint8_t *)src[0];\n>>> +    const uint8_t *curr = (const uint8_t *)src[1];\n>>> +    const uint8_t *next = (const uint8_t *)src[2];\n>>> +\n>>> +    for (int x = 0; x < width_in_bytes; x++) {\n>>> +        /* Even pixel */\n>>> +        RGGB_BGR888(2, 1, 1)\n>>> +        /* Odd pixel RGGB -> GRBG*/\n>>> +        GRBG_BGR888(1, 1, 1)\n>>> +        /* Same thing for next 2 pixels */\n>>> +        RGGB_BGR888(1, 1, 1)\n>>> +        GRBG_BGR888(1, 2, 1)\n>>> +    }\n>>> +}\n>>> +\n>>> +static bool isStandardBayerOrder(BayerFormat::Order order)\n>>> +{\n>>> +    return order == BayerFormat::BGGR || order == BayerFormat::GBRG ||\n>>> +           order == BayerFormat::GRBG || order == BayerFormat::RGGB;\n>>> +}\n>>> +\n>>> +/*\n>>> + * Setup the Debayer object according to the passed in parameters.\n>>> + * Return 0 on success, a negative errno value on failure\n>>> + * (unsupported parameters).\n>>> + */\n>>> +int DebayerCpu::getInputConfig(PixelFormat inputFormat, \n>>> DebayerInputConfig &config)\n>>> +{\n>>> +    BayerFormat bayerFormat =\n>>> +        BayerFormat::fromPixelFormat(inputFormat);\n>>> +\n>>> +    if (bayerFormat.bitDepth == 10 &&\n>>> +        bayerFormat.packing == BayerFormat::Packing::CSI2 &&\n>>> +        isStandardBayerOrder(bayerFormat.order)) {\n>>> +        config.bpp = 10;\n>>> +        config.patternSize.width = 4; /* 5 bytes per *4* pixels */\n>>> +        config.patternSize.height = 2;\n>>> +        config.outputFormats = std::vector<PixelFormat>({ \n>>> formats::RGB888 });\n>>> +        return 0;\n>>> +    }\n>>> +\n>>> +    LOG(Debayer, Info)\n>>> +        << \"Unsupported input format \" << inputFormat.toString();\n>>> +    return -EINVAL;\n>>> +}\n>>> +\n>>> +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, \n>>> DebayerOutputConfig &config)\n>>> +{\n>>> +    if (outputFormat == formats::RGB888) {\n>>> +        config.bpp = 24;\n>>> +        return 0;\n>>> +    }\n>>> +\n>>> +    LOG(Debayer, Info)\n>>> +        << \"Unsupported output format \" << outputFormat.toString();\n>>> +    return -EINVAL;\n>>> +}\n>>> +\n>>> +/* TODO: this ignores outputFormat since there is only 1 supported \n>>> outputFormat for now */\n>>> +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, \n>>> [[maybe_unused]] PixelFormat outputFormat)\n>>> +{\n>>> +    BayerFormat bayerFormat =\n>>> +        BayerFormat::fromPixelFormat(inputFormat);\n>>> +\n>>> +    if (bayerFormat.bitDepth == 10 &&\n>>> +        bayerFormat.packing == BayerFormat::Packing::CSI2) {\n>>> +        switch (bayerFormat.order) {\n>>> +        case BayerFormat::BGGR:\n>>> +            debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n>>> +            debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n>>> +            return 0;\n>>> +        case BayerFormat::GBRG:\n>>> +            debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n>>> +            debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n>>> +            return 0;\n>>> +        case BayerFormat::GRBG:\n>>> +            debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;\n>>> +            debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;\n>>> +            return 0;\n>>> +        case BayerFormat::RGGB:\n>>> +            debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;\n>>> +            debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;\n>>> +            return 0;\n>>> +        default:\n>>> +            break;\n>>> +        }\n>>> +    }\n>>> +\n>>> +    LOG(Debayer, Error) << \"Unsupported input output format \n>>> combination\";\n>>> +    return -EINVAL;\n>>> +}\n>>> +\n>>> +int DebayerCpu::configure(const StreamConfiguration &inputCfg,\n>>> +              const \n>>> std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)\n>>> +{\n>>> +    if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)\n>>> +        return -EINVAL;\n>>> +\n>>> +    if (stats_->configure(inputCfg) != 0)\n>>> +        return -EINVAL;\n>>> +\n>>> +    const Size &stats_pattern_size = stats_->patternSize();\n>>> +    if (inputConfig_.patternSize.width != stats_pattern_size.width ||\n>>> +        inputConfig_.patternSize.height != stats_pattern_size.height) {\n>>> +        LOG(Debayer, Error)\n>>> +            << \"mismatching stats and debayer pattern sizes for \"\n>>> +            << inputCfg.pixelFormat.toString();\n>>> +        return -EINVAL;\n>>> +    }\n>>> +\n>>> +    inputConfig_.stride = inputCfg.stride;\n>>> +\n>>> +    if (outputCfgs.size() != 1) {\n>>> +        LOG(Debayer, Error)\n>>> +            << \"Unsupported number of output streams: \"\n>>> +            << outputCfgs.size();\n>>> +        return -EINVAL;\n>>> +    }\n>>> +\n>>> +    const StreamConfiguration &outputCfg = outputCfgs[0];\n>>> +    SizeRange outSizeRange = sizes(inputCfg.pixelFormat, \n>>> inputCfg.size);\n>>> +    std::tie(outputConfig_.stride, outputConfig_.frameSize) =\n>>> +        strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);\n>>> +\n>>> +    if (!outSizeRange.contains(outputCfg.size) || \n>>> outputConfig_.stride != outputCfg.stride) {\n>>> +        LOG(Debayer, Error)\n>>> +            << \"Invalid output size/stride: \"\n>>> +            << \"\\n  \" << outputCfg.size << \" (\" << outSizeRange << \")\"\n>>> +            << \"\\n  \" << outputCfg.stride << \" (\" << \n>>> outputConfig_.stride << \")\";\n>>> +        return -EINVAL;\n>>> +    }\n>>> +\n>>> +    if (setDebayerFunctions(inputCfg.pixelFormat, \n>>> outputCfg.pixelFormat) != 0)\n>>> +        return -EINVAL;\n>>> +\n>>> +    window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &\n>>> +            ~(inputConfig_.patternSize.width - 1);\n>>> +    window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &\n>>> +            ~(inputConfig_.patternSize.height - 1);\n>>> +    window_.width = outputCfg.size.width;\n>>> +    window_.height = outputCfg.size.height;\n>>> +\n>>> +    /* Don't pass x,y since process() already adjusts src before \n>>> passing it */\n>>> +    stats_->setWindow(Rectangle(window_.size()));\n>>> +\n>>> +    /* pad with patternSize.Width on both left and right side */\n>>> +    lineBufferPadding_ = inputConfig_.patternSize.width * \n>>> inputConfig_.bpp / 8;\n>>> +    lineBufferLength_ = window_.width * inputConfig_.bpp / 8 +\n>>> +                2 * lineBufferPadding_;\n>>> +    for (unsigned int i = 0;\n>>> +         i < (inputConfig_.patternSize.height + 1) && \n>>> enableInputMemcpy_;\n>>> +         i++) {\n>>> +        free(lineBuffers_[i]);\n>>> +        lineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_);\n>>> +        if (!lineBuffers_[i])\n>>> +            return -ENOMEM;\n>>> +    }\n>>> +\n>>> +    measuredFrames_ = 0;\n>>> +    frameProcessTime_ = 0;\n>>> +\n>>> +    return 0;\n>>> +}\n>>> +\n>>> +/*\n>>> + * Get width and height at which the bayer-pattern repeats.\n>>> + * Return pattern-size or an empty Size for an unsupported inputFormat.\n>>> + */\n>>> +Size DebayerCpu::patternSize(PixelFormat inputFormat)\n>>> +{\n>>> +    DebayerCpu::DebayerInputConfig config;\n>>> +\n>>> +    if (getInputConfig(inputFormat, config) != 0)\n>>> +        return {};\n>>> +\n>>> +    return config.patternSize;\n>>> +}\n>>> +\n>>> +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)\n>>> +{\n>>> +    DebayerCpu::DebayerInputConfig config;\n>>> +\n>>> +    if (getInputConfig(inputFormat, config) != 0)\n>>> +        return std::vector<PixelFormat>();\n>>> +\n>>> +    return config.outputFormats;\n>>> +}\n>>> +\n>>> +std::tuple<unsigned int, unsigned int>\n>>> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, \n>>> const Size &size)\n>>> +{\n>>> +    DebayerCpu::DebayerOutputConfig config;\n>>> +\n>>> +    if (getOutputConfig(outputFormat, config) != 0)\n>>> +        return std::make_tuple(0, 0);\n>>> +\n>>> +    /* round up to multiple of 8 for 64 bits alignment */\n>>> +    unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;\n>>> +\n>>> +    return std::make_tuple(stride, stride * size.height);\n>>> +}\n>>> +\n>>> +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])\n>>> +{\n>>> +    const unsigned int patternHeight = inputConfig_.patternSize.height;\n>>> +\n>>> +    if (!enableInputMemcpy_)\n>>> +        return;\n>>> +\n>>> +    for (unsigned int i = 0; i < patternHeight; i++) {\n>>> +        memcpy(lineBuffers_[i], linePointers[i + 1] - \n>>> lineBufferPadding_,\n>>> +               lineBufferLength_);\n>>> +        linePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_;\n>>> +    }\n>>> +\n>>> +    /* Point lineBufferIndex_ to first unused lineBuffer */\n>>> +    lineBufferIndex_ = patternHeight;\n>>> +}\n>>> +\n>>> +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], \n>>> const uint8_t *src)\n>>> +{\n>>> +    const unsigned int patternHeight = inputConfig_.patternSize.height;\n>>> +\n>>> +    for (unsigned int i = 0; i < patternHeight; i++)\n>>> +        linePointers[i] = linePointers[i + 1];\n>>> +\n>>> +    linePointers[patternHeight] = src +\n>>> +                      (patternHeight / 2) * (int)inputConfig_.stride;\n>>> +}\n>>> +\n>>> +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])\n>>> +{\n>>> +    const unsigned int patternHeight = inputConfig_.patternSize.height;\n>>> +\n>>> +    if (!enableInputMemcpy_)\n>>> +        return;\n>>> +\n>>> +    memcpy(lineBuffers_[lineBufferIndex_], \n>>> linePointers[patternHeight] - lineBufferPadding_,\n>>> +           lineBufferLength_);\n>>> +    linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + \n>>> lineBufferPadding_;\n>>> +\n>>> +    lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);\n>>> +}\n>>> +\n>>> +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)\n>>> +{\n>>> +    unsigned int y_end = window_.y + window_.height;\n>>> +    /* Holds [0] previous- [1] current- [2] next-line */\n>>> +    const uint8_t *linePointers[3];\n>>> +\n>>> +    /* Adjust src to top left corner of the window */\n>>> +    src += window_.y * inputConfig_.stride + window_.x * \n>>> inputConfig_.bpp / 8;\n>>> +\n>>> +    /* [x] becomes [x - 1] after initial shiftLinePointers() call */\n>>> +    if (window_.y) {\n>>> +        linePointers[1] = src - inputConfig_.stride; /* \n>>> previous-line */\n>>> +        linePointers[2] = src;\n>>> +    } else {\n>>> +        /* window_.y == 0, use the next line as prev line */\n>>> +        linePointers[1] = src + inputConfig_.stride;\n>>> +        linePointers[2] = src;\n>>> +        /* Last 2 lines also need special handling */\n>>> +        y_end -= 2;\n>>> +    }\n>>> +\n>>> +    setupInputMemcpy(linePointers);\n>>> +\n>>> +    for (unsigned int y = window_.y; y < y_end; y += 2) {\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        memcpyNextLine(linePointers);\n>>> +        stats_->processLine0(y, linePointers);\n>>> +        (this->*debayer0_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        memcpyNextLine(linePointers);\n>>> +        (this->*debayer1_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +    }\n>>> +\n>>> +    if (window_.y == 0) {\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        memcpyNextLine(linePointers);\n>>> +        stats_->processLine0(y_end, linePointers);\n>>> +        (this->*debayer0_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        /* next line may point outside of src, use prev. */\n>>> +        linePointers[2] = linePointers[0];\n>>> +        (this->*debayer1_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +    }\n>>> +}\n>>> +\n>>> +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)\n>>> +{\n>>> +    const unsigned int y_end = window_.y + window_.height;\n>>> +    /*\n>>> +     * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] \n>>> current-line\n>>> +     * [3] 1-line-down [4] 2-lines-down.\n>>> +     */\n>>> +    const uint8_t *linePointers[5];\n>>> +\n>>> +    /* Adjust src to top left corner of the window */\n>>> +    src += window_.y * inputConfig_.stride + window_.x * \n>>> inputConfig_.bpp / 8;\n>>> +\n>>> +    /* [x] becomes [x - 1] after initial shiftLinePointers() call */\n>>> +    linePointers[1] = src - 2 * inputConfig_.stride;\n>>> +    linePointers[2] = src - inputConfig_.stride;\n>>> +    linePointers[3] = src;\n>>> +    linePointers[4] = src + inputConfig_.stride;\n>>> +\n>>> +    setupInputMemcpy(linePointers);\n>>> +\n>>> +    for (unsigned int y = window_.y; y < y_end; y += 4) {\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        memcpyNextLine(linePointers);\n>>> +        stats_->processLine0(y, linePointers);\n>>> +        (this->*debayer0_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        memcpyNextLine(linePointers);\n>>> +        (this->*debayer1_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        memcpyNextLine(linePointers);\n>>> +        stats_->processLine2(y, linePointers);\n>>> +        (this->*debayer2_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +\n>>> +        shiftLinePointers(linePointers, src);\n>>> +        memcpyNextLine(linePointers);\n>>> +        (this->*debayer3_)(dst, linePointers);\n>>> +        src += inputConfig_.stride;\n>>> +        dst += outputConfig_.stride;\n>>> +    }\n>>> +}\n>>> +\n>>> +static inline int64_t timeDiff(timespec &after, timespec &before)\n>>> +{\n>>> +    return (after.tv_sec - before.tv_sec) * 1000000000LL +\n>>> +           (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;\n>>> +}\n>>> +\n>>> +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, \n>>> DebayerParams params)\n>>> +{\n>>> +    timespec frameStartTime;\n>>> +\n>>> +    if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) {\n>>> +        frameStartTime = {};\n>>> +        clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);\n>>> +    }\n>>> +\n>>> +    /* Apply DebayerParams */\n>>> +    if (params.gamma != gamma_correction_) {\n>>> +        for (unsigned int i = 0; i < kGammaLookupSize; i++)\n>>> +            gamma_[i] = UINT8_MAX * powf(i / (kGammaLookupSize - \n>>> 1.0), params.gamma);\n>>> +\n>>> +        gamma_correction_ = params.gamma;\n>>> +    }\n>>> +\n>>> +    for (unsigned int i = 0; i < kRGBLookupSize; i++) {\n>>> +        constexpr unsigned int div =\n>>> +            kRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize;\n>>> +        unsigned int idx;\n>>> +\n>>> +        /* Apply gamma after gain! */\n>>> +        idx = std::min({ i * params.gainR / div, (kGammaLookupSize - \n>>> 1) });\n>>> +        red_[i] = gamma_[idx];\n>>> +\n>>> +        idx = std::min({ i * params.gainG / div, (kGammaLookupSize - \n>>> 1) });\n>>> +        green_[i] = gamma_[idx];\n>>> +\n>>> +        idx = std::min({ i * params.gainB / div, (kGammaLookupSize - \n>>> 1) });\n>>> +        blue_[i] = gamma_[idx];\n>>> +    }\n>>> +\n>>> +    /* Copy metadata from the input buffer */\n>>> +    FrameMetadata &metadata = output->_d()->metadata();\n>>> +    metadata.status = input->metadata().status;\n>>> +    metadata.sequence = input->metadata().sequence;\n>>> +    metadata.timestamp = input->metadata().timestamp;\n>>> +\n>>> +    MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);\n>>> +    MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);\n>>> +    if (!in.isValid() || !out.isValid()) {\n>>> +        LOG(Debayer, Error) << \"mmap-ing buffer(s) failed\";\n>>> +        metadata.status = FrameMetadata::FrameError;\n>>> +        return;\n>>> +    }\n>>> +\n>>> +    stats_->startFrame();\n>>> +\n>>> +    if (inputConfig_.patternSize.height == 2)\n>>> +        process2(in.planes()[0].data(), out.planes()[0].data());\n>>> +    else\n>>> +        process4(in.planes()[0].data(), out.planes()[0].data());\n>>> +\n>>> +    metadata.planes()[0].bytesused = out.planes()[0].size();\n>>> +\n>>> +    /* Measure before emitting signals */\n>>> +    if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&\n>>> +        ++measuredFrames_ > DebayerCpu::kFramesToSkip) {\n>>> +        timespec frameEndTime = {};\n>>> +        clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);\n>>> +        frameProcessTime_ += timeDiff(frameEndTime, frameStartTime);\n>>> +        if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) {\n>>> +            const unsigned int measuredFrames = \n>>> DebayerCpu::kLastFrameToMeasure -\n>>> +                                DebayerCpu::kFramesToSkip;\n>>> +            LOG(Debayer, Info)\n>>> +                << \"Processed \" << measuredFrames\n>>> +                << \" frames in \" << frameProcessTime_ / 1000 << \"us, \"\n>>> +                << frameProcessTime_ / (1000 * measuredFrames)\n>>> +                << \" us/frame\";\n>>> +        }\n>>> +    }\n>>> +\n>>> +    stats_->finishFrame();\n>>> +    outputBufferReady.emit(output);\n>>> +    inputBufferReady.emit(input);\n>>> +}\n>>> +\n>>> +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size \n>>> &inputSize)\n>>> +{\n>>> +    Size pattern_size = patternSize(inputFormat);\n>>> +    unsigned int border_height = pattern_size.height;\n>>> +\n>>> +    if (pattern_size.isNull())\n>>> +        return {};\n>>> +\n>>> +    /* No need for top/bottom border with a pattern height of 2 */\n>>> +    if (pattern_size.height == 2)\n>>> +        border_height = 0;\n>>> +\n>>> +    /*\n>>> +     * For debayer interpolation a border is kept around the entire \n>>> image\n>>> +     * and the minimum output size is pattern-height x pattern-width.\n>>> +     */\n>>> +    if (inputSize.width < (3 * pattern_size.width) ||\n>>> +        inputSize.height < (2 * border_height + pattern_size.height)) {\n>>> +        LOG(Debayer, Warning)\n>>> +            << \"Input format size too small: \" << inputSize.toString();\n>>> +        return {};\n>>> +    }\n>>> +\n>>> +    return SizeRange(Size(pattern_size.width, pattern_size.height),\n>>> +             Size((inputSize.width - 2 * pattern_size.width) & \n>>> ~(pattern_size.width - 1),\n>>> +                  (inputSize.height - 2 * border_height) & \n>>> ~(pattern_size.height - 1)),\n>>> +             pattern_size.width, pattern_size.height);\n>>> +}\n>>> +\n>>> +} /* namespace libcamera */\n>>> diff --git a/src/libcamera/software_isp/debayer_cpu.h \n>>> b/src/libcamera/software_isp/debayer_cpu.h\n>>> new file mode 100644\n>>> index 00000000..e0c4b9a8\n>>> --- /dev/null\n>>> +++ b/src/libcamera/software_isp/debayer_cpu.h\n>>> @@ -0,0 +1,143 @@\n>>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n>>> +/*\n>>> + * Copyright (C) 2023, Linaro Ltd\n>>> + * Copyright (C) 2023, Red Hat Inc.\n>>> + *\n>>> + * Authors:\n>>> + * Hans de Goede <hdegoede@redhat.com>\n>>> + *\n>>> + * debayer_cpu.h - CPU based debayering header\n>>> + */\n>>> +\n>>> +#pragma once\n>>> +\n>>> +#include <memory>\n>>> +#include <stdint.h>\n>>> +#include <vector>\n>>> +\n>>> +#include <libcamera/base/object.h>\n>>> +\n>>> +#include \"debayer.h\"\n>>> +#include \"swstats_cpu.h\"\n>>> +\n>>> +namespace libcamera {\n>>> +\n>>> +class DebayerCpu : public Debayer, public Object\n>>> +{\n>>> +public:\n>>> +    DebayerCpu(std::unique_ptr<SwStatsCpu> stats);\n>>> +    ~DebayerCpu();\n>>> +\n>>> +    int configure(const StreamConfiguration &inputCfg,\n>>> +              const \n>>> std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);\n>>> +    Size patternSize(PixelFormat inputFormat);\n>>> +    std::vector<PixelFormat> formats(PixelFormat input);\n>>> +    std::tuple<unsigned int, unsigned int>\n>>> +    strideAndFrameSize(const PixelFormat &outputFormat, const Size \n>>> &size);\n>>> +    void process(FrameBuffer *input, FrameBuffer *output, \n>>> DebayerParams params);\n>>> +    SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);\n>>> +\n>>> +    /**\n>>> +     * \\brief Get the file descriptor for the statistics.\n>>> +     *\n>>> +     * \\return the file descriptor pointing to the statistics.\n>>> +     */\n>>> +    const SharedFD &getStatsFD() { return stats_->getStatsFD(); }\n>>> +\n>>> +    /**\n>>> +     * \\brief Get the output frame size.\n>>> +     *\n>>> +     * \\return The output frame size.\n>>> +     */\n>>> +    unsigned int frameSize() { return outputConfig_.frameSize; }\n>>> +\n>>> +private:\n>>> +    /**\n>>> +     * \\brief Called to debayer 1 line of Bayer input data to output \n>>> format\n>>> +     * \\param[out] dst Pointer to the start of the output line to write\n>>> +     * \\param[in] src The input data\n>>> +     *\n>>> +     * Input data is an array of (patternSize_.height + 1) src\n>>> +     * pointers each pointing to a line in the Bayer source. The middle\n>>> +     * element of the array will point to the actual line being \n>>> processed.\n>>> +     * Earlier element(s) will point to the previous line(s) and later\n>>> +     * element(s) to the next line(s).\n>>> +     *\n>>> +     * These functions take an array of src pointers, rather then\n>>> +     * a single src pointer + a stride for the source, so that when \n>>> the src\n>>> +     * is slow uncached memory it can be copied to faster memory before\n>>> +     * debayering. Debayering a standard 2x2 Bayer pattern requires \n>>> access\n>>> +     * to the previous and next src lines for interpolating the missing\n>>> +     * colors. To allow copying the src lines only once 3 buffers each\n>>> +     * holding a single line are used, re-using the oldest buffer for\n>>> +     * the next line and the pointers are swizzled so that:\n>>> +     * src[0] = previous-line, src[1] = currrent-line, src[2] = \n>>> next-line.\n>>> +     * This way the 3 pointers passed to the debayer functions form\n>>> +     * a sliding window over the src avoiding the need to copy each\n>>> +     * line more then once.\n>>> +     *\n>>> +     * Similarly for bayer patterns which repeat every 4 lines, 5 src\n>>> +     * pointers are passed holding: src[0] = 2-lines-up, src[1] = \n>>> 1-line-up\n>>> +     * src[2] = current-line, src[3] = 1-line-down, src[4] = \n>>> 2-lines-down.\n>>> +     */\n>>> +    typedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const \n>>> uint8_t *src[]);\n>>> +\n>>> +    /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */\n>>> +    void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);\n>>> +    void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);\n>>> +    void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);\n>>> +    void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);\n>>> +\n>>> +    struct DebayerInputConfig {\n>>> +        Size patternSize;\n>>> +        unsigned int bpp; /* Memory used per pixel, not precision */\n>>> +        unsigned int stride;\n>>> +        std::vector<PixelFormat> outputFormats;\n>>> +    };\n>>> +\n>>> +    struct DebayerOutputConfig {\n>>> +        unsigned int bpp; /* Memory used per pixel, not precision */\n>>> +        unsigned int stride;\n>>> +        unsigned int frameSize;\n>>> +    };\n>>> +\n>>> +    int getInputConfig(PixelFormat inputFormat, DebayerInputConfig \n>>> &config);\n>>> +    int getOutputConfig(PixelFormat outputFormat, \n>>> DebayerOutputConfig &config);\n>>> +    int setDebayerFunctions(PixelFormat inputFormat, PixelFormat \n>>> outputFormat);\n>>> +    void setupInputMemcpy(const uint8_t *linePointers[]);\n>>> +    void shiftLinePointers(const uint8_t *linePointers[], const \n>>> uint8_t *src);\n>>> +    void memcpyNextLine(const uint8_t *linePointers[]);\n>>> +    void process2(const uint8_t *src, uint8_t *dst);\n>>> +    void process4(const uint8_t *src, uint8_t *dst);\n>>> +\n>>> +    static constexpr unsigned int kGammaLookupSize = 1024;\n>>> +    static constexpr unsigned int kRGBLookupSize = 256;\n>>> +    /* Max. supported Bayer pattern height is 4, debayering this \n>>> requires 5 lines */\n>>> +    static constexpr unsigned int kMaxLineBuffers = 5;\n>>> +\n>>> +    std::array<uint8_t, kGammaLookupSize> gamma_;\n>>> +    std::array<uint8_t, kRGBLookupSize> red_;\n>>> +    std::array<uint8_t, kRGBLookupSize> green_;\n>>> +    std::array<uint8_t, kRGBLookupSize> blue_;\n>>> +    debayerFn debayer0_;\n>>> +    debayerFn debayer1_;\n>>> +    debayerFn debayer2_;\n>>> +    debayerFn debayer3_;\n>>> +    Rectangle window_;\n>>> +    DebayerInputConfig inputConfig_;\n>>> +    DebayerOutputConfig outputConfig_;\n>>> +    std::unique_ptr<SwStatsCpu> stats_;\n>>> +    uint8_t *lineBuffers_[kMaxLineBuffers];\n>>> +    unsigned int lineBufferLength_;\n>>> +    unsigned int lineBufferPadding_;\n>>> +    unsigned int lineBufferIndex_;\n>>> +    bool enableInputMemcpy_;\n>>> +    float gamma_correction_;\n>>> +    unsigned int measuredFrames_;\n>>> +    int64_t frameProcessTime_;\n>>> +    /* Skip 30 frames for things to stabilize then measure 30 frames */\n>>> +    static constexpr unsigned int kFramesToSkip = 30;\n>>> +    static constexpr unsigned int kLastFrameToMeasure = 60;\n>>> +};\n>>> +\n>>> +} /* namespace libcamera */\n>>> diff --git a/src/libcamera/software_isp/meson.build \n>>> b/src/libcamera/software_isp/meson.build\n>>> index 92fc90f3..dd5e70c1 100644\n>>> --- a/src/libcamera/software_isp/meson.build\n>>> +++ b/src/libcamera/software_isp/meson.build\n>>> @@ -2,5 +2,6 @@\n>>>   libcamera_sources += files([\n>>>       'debayer.cpp',\n>>> +    'debayer_cpu.cpp',\n>>>       'swstats_cpu.cpp',\n>>>   ])\n>>","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id EAF75C3257\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 20 Feb 2024 12:26:20 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 1602361CAA;\n\tTue, 20 Feb 2024 13:26:20 +0100 (CET)","from perceval.ideasonboard.com (perceval.ideasonboard.com\n\t[213.167.242.64])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 76FA161CA3\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 20 Feb 2024 13:26:18 +0100 (CET)","from [IPV6:2a00:6020:448c:6c00:8d25:d85f:6c20:5cde] (unknown\n\t[IPv6:2a00:6020:448c:6c00:8d25:d85f:6c20:5cde])\n\tby perceval.ideasonboard.com (Postfix) with ESMTPSA id B3FCB13AC;\n\tTue, 20 Feb 2024 13:26:10 +0100 (CET)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=ideasonboard.com header.i=@ideasonboard.com\n\theader.b=\"Z9h2eBYD\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/simple; d=ideasonboard.com;\n\ts=mail; t=1708431971;\n\tbh=0oC/yjIsYFKApf8Quw3o+tonawpP+3H2s9p/zH528Kc=;\n\th=Date:Subject:To:Cc:References:From:In-Reply-To:From;\n\tb=Z9h2eBYD3IrpIjri61QPXqklpoEesO+HG2t4MUT7ow1ZMBXIPHUoVO5rmEZ+NEaAN\n\tLEYuSjbxFEqECP1wX/PMuz19bovZ6ymSH8LjphgpoRHpP/KiBnJ5swE8rTecv9CCp2\n\tgw3EPwHXhxXu1A4yEWonBpfpHIDQ9r+VtZiWraQk=","Message-ID":"<dce98834-99ab-4941-aceb-28142ec998f9@ideasonboard.com>","Date":"Tue, 20 Feb 2024 13:26:14 +0100","MIME-Version":"1.0","User-Agent":"Mozilla Thunderbird","Subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","Content-Language":"en-US","To":"Andrei Konovalov <andrey.konovalov.ynk@gmail.com>,\n\tHans de Goede <hdegoede@redhat.com>, libcamera-devel@lists.libcamera.org","References":"<20240214170122.60754-1-hdegoede@redhat.com>\n\t<20240214170122.60754-9-hdegoede@redhat.com>\n\t<659d6c77-e8f3-4d8a-abeb-95c791c42662@ideasonboard.com>\n\t<731614fe-d28f-4049-97ca-e7d637df893a@gmail.com>","From":"Stefan Klug <stefan.klug@ideasonboard.com>","In-Reply-To":"<731614fe-d28f-4049-97ca-e7d637df893a@gmail.com>","Content-Type":"text/plain; charset=UTF-8; format=flowed","Content-Transfer-Encoding":"8bit","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Cc":"Bryan O'Donoghue <bryan.odonoghue@linaro.org>,\n\tMaxime Ripard <mripard@redhat.com>, Pavel Machek <pavel@ucw.cz>,\n\tDennis Bonke <admin@dennisbonke.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":28757,"web_url":"https://patchwork.libcamera.org/comment/28757/","msgid":"<996640ef-8fd5-43ed-bd83-7d5f71fb3782@redhat.com>","date":"2024-02-27T13:23:41","subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","submitter":{"id":102,"url":"https://patchwork.libcamera.org/api/people/102/","name":"Hans de Goede","email":"hdegoede@redhat.com"},"content":"Hi,\n\nOn 2/15/24 16:33, Milan Zamazal wrote:\n> Hans de Goede <hdegoede@redhat.com> writes:\n> \n>> Add CPU based debayering implementation. This initial implementation\n>> only supports debayering packed 10 bits per pixel bayer data in\n>> the 4 standard bayer orders.\n\n<snip>\n\n>> +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)\n>> +{\n>> +\tSize pattern_size = patternSize(inputFormat);\n>> +\tunsigned int border_height = pattern_size.height;\n>> +\n>> +\tif (pattern_size.isNull())\n>> +\t\treturn {};\n>> +\n>> +\t/* No need for top/bottom border with a pattern height of 2 */\n>> +\tif (pattern_size.height == 2)\n>> +\t\tborder_height = 0;\n>> +\n>> +\t/*\n>> +\t * For debayer interpolation a border is kept around the entire image\n>> +\t * and the minimum output size is pattern-height x pattern-width.\n>> +\t */\n> \n> What if the output size is larger?  The border is quite impractical because it\n> forces (or not?) the output size to be non-standard, assuming the camera\n> provides common resolutions.  Consider e.g. full-HD camera resolution not being\n> able to be output 1:1 to a full-HD display.\n\nHardware ISPs also need a similar border, because including special hardware\nto deal with the edges would be quite expensive both in silicon area as\nwell as in power consumption. So sensors typically have a slightly bigger\nresolution then the standard resolutions. E.g. the ov2680 sensor has a\npixelarray of 1616x1216 pixels and the ov2740 used in ThinkPads has\n1932x1092 pixels.\n\nHW ISPs use the extra pixels both for interpolation near the border as\nwell as to be able to use a crop window starting at 0x1 1x0 or 1x1 to\nshift the bayer pattern so that the hw only needs to support a single\nbayer pattern.\n\n<snip>\n\n>> +private:\n>> +\t/**\n>> +\t * \\brief Called to debayer 1 line of Bayer input data to output format\n>> +\t * \\param[out] dst Pointer to the start of the output line to write\n>> +\t * \\param[in] src The input data\n>> +\t *\n>> +\t * Input data is an array of (patternSize_.height + 1) src\n>> +\t * pointers each pointing to a line in the Bayer source. The middle\n>> +\t * element of the array will point to the actual line being processed.\n>> +\t * Earlier element(s) will point to the previous line(s) and later\n>> +\t * element(s) to the next line(s).\n>> +\t *\n>> +\t * These functions take an array of src pointers, rather then\n>> +\t * a single src pointer + a stride for the source, so that when the src\n>> +\t * is slow uncached memory it can be copied to faster memory before\n>> +\t * debayering. Debayering a standard 2x2 Bayer pattern requires access\n>> +\t * to the previous and next src lines for interpolating the missing\n>> +\t * colors. To allow copying the src lines only once 3 buffers each\n> \n> I'd avoid using the term \"buffer\" here to avoid any confusion with input and\n> output buffers.\n\nBut they are buffers, I have added temporary there now so that this now\nreads \"To allow copying the src lines only once 3 temporary buffers each ...\"\nto make clear these are not the input / output buffers.\n\n> \n>> +\t * holding a single line are used, re-using the oldest buffer for\n>> +\t * the next line and the pointers are swizzled so that:\n>> +\t * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line.\n>> +\t * This way the 3 pointers passed to the debayer functions form\n>> +\t * a sliding window over the src avoiding the need to copy each\n>> +\t * line more then once.\n> \n> then -> than\n> \n>> +\t *\n>> +\t * Similarly for bayer patterns which repeat every 4 lines, 5 src\n>> +\t * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up\n>> +\t * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down.\n>> +\t */\n\nThanks, both fixed in my personal tree.\n\nRegards,\n\nHans","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id EC6C1BD160\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 27 Feb 2024 13:23:49 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 4FB3462865;\n\tTue, 27 Feb 2024 14:23:49 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.133.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 5D055627FC\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 27 Feb 2024 14:23:47 +0100 (CET)","from mail-lf1-f71.google.com (mail-lf1-f71.google.com\n\t[209.85.167.71]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-554-FPedNryaM5-gyHHdQfUVuQ-1; Tue, 27 Feb 2024 08:23:45 -0500","by mail-lf1-f71.google.com with SMTP id\n\t2adb3069b0e04-513136edf3bso408625e87.0\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 27 Feb 2024 05:23:44 -0800 (PST)","from ?IPV6:2001:1c00:c32:7800:5bfa:a036:83f0:f9ec?\n\t(2001-1c00-0c32-7800-5bfa-a036-83f0-f9ec.cable.dynamic.v6.ziggo.nl.\n\t[2001:1c00:c32:7800:5bfa:a036:83f0:f9ec])\n\tby smtp.gmail.com with ESMTPSA id\n\tg20-20020a056402091400b0056452477a5esm763339edz.24.2024.02.27.05.23.42\n\t(version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);\n\tTue, 27 Feb 2024 05:23:42 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"N6QJYZSz\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1709040226;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tcontent-transfer-encoding:content-transfer-encoding:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=PMr8ZqxZ7nbc6YqO94LfQB702CeEkHvVOt2S/GePecQ=;\n\tb=N6QJYZSzfyjXdFKIN64vkE12vQWjX13ctl2glPzkzZ9PaXjvOPZfdta+GQZqtbc55+dg0D\n\tTz1n3rDPali3qZ7gq3dWX4mtMmrCRw+YJprh+LVyUXneucG+Oyy+l1g4VkpxTPJoYnUdSO\n\tGr4/AeG2XVJTcbfPOD+hpAfEpFlr5Gk=","X-MC-Unique":"FPedNryaM5-gyHHdQfUVuQ-1","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1709040223; x=1709645023;\n\th=content-transfer-encoding:in-reply-to:from:references:cc:to\n\t:content-language:subject:user-agent:mime-version:date:message-id\n\t:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;\n\tbh=PMr8ZqxZ7nbc6YqO94LfQB702CeEkHvVOt2S/GePecQ=;\n\tb=J0dLEbCPcp/b+SIcTjJJRwkaW2tvh8NUelj/1+H92MBo5PAv9KTQES+W5Dwt9pETQk\n\tIOUMUnHS7k5I668EXrPqovHllsOt8HDbIPi5RnsxaIYV1Pve0F7WOfd2Q6Lk+Qg4nMS0\n\tO3lAiJFnGwHgrqOzWbUb8kBLDrdq1X59Pz3rBojw1RtzIFfDGkZYtJaGQmA8BFgtF36R\n\tcBpxMRdwseKjMpQSYMyTM5m558czshSUAd06wED7u1vCH8vUMIkGB5zyWOM51cefhp83\n\tTf9pM5MdmowGGJVmfJu9mCKzg3TwSZh7Sl4Ka/ZyRGNx4xL5Q6+yfyPE//SwxEBnAH6F\n\t4kNg==","X-Gm-Message-State":"AOJu0Yw+LAR1zgasUZWqQ70cPODfaga6Tmg5M3KgJLslBQoqkmMy4das\n\tYlVMYnOtga9Fz8YPPJOt4zW6tX0T0ndOclY1a6sufYxGBN9750JcHslL6lVzngf3dbamsEk7aC3\n\t17Qmi0xbHfje4rDOqWE8ZBavParhrrNV6yJO2v9uHiv2tNNUibbudOhX6BkbYmmaAeF9Xiag3yn\n\t5wIKc=","X-Received":["by 2002:a05:6512:3b07:b0:512:f6da:e52e with SMTP id\n\tf7-20020a0565123b0700b00512f6dae52emr5330289lfv.28.1709040223351; \n\tTue, 27 Feb 2024 05:23:43 -0800 (PST)","by 2002:a05:6512:3b07:b0:512:f6da:e52e with SMTP id\n\tf7-20020a0565123b0700b00512f6dae52emr5330276lfv.28.1709040222939; \n\tTue, 27 Feb 2024 05:23:42 -0800 (PST)"],"X-Google-Smtp-Source":"AGHT+IH9Uni6idY7iPYkNI6Xdj4TRsiDzOPnIhVOgFezHnQZ3/vKoeYDmHBZKXrAoefiDqxBMAjksg==","Message-ID":"<996640ef-8fd5-43ed-bd83-7d5f71fb3782@redhat.com>","Date":"Tue, 27 Feb 2024 14:23:41 +0100","MIME-Version":"1.0","User-Agent":"Mozilla Thunderbird","Subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","To":"Milan Zamazal <mzamazal@redhat.com>","References":"<20240214170122.60754-1-hdegoede@redhat.com>\n\t<20240214170122.60754-9-hdegoede@redhat.com>\n\t<87bk8hd9ng.fsf@redhat.com>","From":"Hans de Goede <hdegoede@redhat.com>","In-Reply-To":"<87bk8hd9ng.fsf@redhat.com>","X-Mimecast-Spam-Score":"0","X-Mimecast-Originator":"redhat.com","Content-Language":"en-US, nl","Content-Type":"text/plain; charset=UTF-8","Content-Transfer-Encoding":"7bit","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Cc":"Maxime Ripard <mripard@redhat.com>, libcamera-devel@lists.libcamera.org, \n\tPavel Machek <pavel@ucw.cz>,\n\tBryan O'Donoghue <bryan.odonoghue@linaro.org>, \n\tDennis Bonke <admin@dennisbonke.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":28758,"web_url":"https://patchwork.libcamera.org/comment/28758/","msgid":"<4a340033-92f8-444b-abc2-2ddff33698c8@redhat.com>","date":"2024-02-27T13:25:42","subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","submitter":{"id":102,"url":"https://patchwork.libcamera.org/api/people/102/","name":"Hans de Goede","email":"hdegoede@redhat.com"},"content":"Hi Stefan,\n\nOn 2/20/24 13:26, Stefan Klug wrote:\n> Hi Andrei,\n> \n> Am 20.02.24 um 11:54 schrieb Andrei Konovalov:\n>> Hi Stefan,\n>>\n>> On 19.02.2024 20:09, Stefan Klug wrote:\n>>> Hi Hans,\n>>>\n>>> thanks for your work on the SoftISP.\n>>>\n>>> Am 14.02.24 um 18:01 schrieb Hans de Goede:\n>>>> Add CPU based debayering implementation. This initial implementation\n>>>> only supports debayering packed 10 bits per pixel bayer data in\n>>>> the 4 standard bayer orders.\n>>>>\n>>>> Doxygen documentation by Dennis Bonke.\n>>>>\n>>>> Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s\n>>>> Tested-by: Pavel Machek <pavel@ucw.cz>\n>>>> Reviewed-by: Pavel Machek <pavel@ucw.cz>\n>>>> Co-developed-by: Dennis Bonke <admin@dennisbonke.com>\n>>>> Signed-off-by: Dennis Bonke <admin@dennisbonke.com>\n>>>> Co-developed-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n>>>> Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org>\n>>>> Co-developed-by: Pavel Machek <pavel@ucw.cz>\n>>>> Signed-off-by: Pavel Machek <pavel@ucw.cz>\n>>>> Signed-off-by: Hans de Goede <hdegoede@redhat.com>\n>>>> ---\n>>>> Changes in v3:\n>>>> - Move debayer_cpu.h to src/libcamera/software_isp/\n>>>> - Move documentation to .cpp file\n>>>> - Document how/why an array of src pointers is passed to\n>>>>    the debayer functions\n>>>> ---\n>>>>   src/libcamera/software_isp/debayer_cpu.cpp | 619 +++++++++++++++++++++\n>>>>   src/libcamera/software_isp/debayer_cpu.h   | 143 +++++\n>>>>   src/libcamera/software_isp/meson.build     |   1 +\n>>>>   3 files changed, 763 insertions(+)\n>>>>   create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp\n>>>>   create mode 100644 src/libcamera/software_isp/debayer_cpu.h\n>>>>\n>>>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp\n>>>> new file mode 100644\n>>>> index 00000000..53e90776\n>>>> --- /dev/null\n>>>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp\n>>>> @@ -0,0 +1,619 @@\n>>>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */\n>>>> +/*\n>>>> + * Copyright (C) 2023, Linaro Ltd\n>>>> + * Copyright (C) 2023, Red Hat Inc.\n>>>> + *\n>>>> + * Authors:\n>>>> + * Hans de Goede <hdegoede@redhat.com>\n>>>> + *\n>>>> + * debayer_cpu.cpp - CPU based debayering class\n>>>> + */\n>>>> +\n>>>> +#include \"debayer_cpu.h\"\n>>>> +\n>>>> +#include <math.h>\n>>>> +#include <stdlib.h>\n>>>> +#include <time.h>\n>>>> +\n>>>> +#include <libcamera/formats.h>\n>>>> +\n>>>> +#include \"libcamera/internal/bayer_format.h\"\n>>>> +#include \"libcamera/internal/framebuffer.h\"\n>>>> +#include \"libcamera/internal/mapped_framebuffer.h\"\n>>>> +\n>>>> +namespace libcamera {\n>>>> +\n>>>> +/**\n>>>> + * \\class DebayerCpu\n>>>> + * \\brief Class for debayering on the CPU\n>>>> + *\n>>>> + * Implementation for CPU based debayering\n>>>> + */\n>>>> +\n>>>> +/**\n>>>> + * \\brief Constructs a DebayerCpu object.\n>>>> + * \\param[in] stats Pointer to the stats object to use.\n>>>> + */\n>>>> +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)\n>>>\n>>> I tried to use the ISP without statistics/regulation altogether and just set stats to a nullptr. This fails in a few places. IMHO it would improve flexibility & reusability to allow that.\n>>> Attched is a patch with the modifications as I needed them anyways. Feel free to include them.\n>>\n>> Your patch itself looks OK for me (I haven't tested it though).\n>>\n>> But I am curious what is the reason for not using stats?\n>> Leaving the debayer step only, one looses AWB (which doesn't need any particular\n>> support from the hardware and prevents the typical raw bayer green tint) and AE/AGC (which only\n>> needs at least one of the two camera sensor controls, and with wrong exposure the\n>> image may loose the information, and this can't be fully compensated by post processing).\n> \n> I was working on the camera sensor implementation in libcamera and needed to quickly display the debayered sensor image. No auto regulation should happen as I wanted to manually control gain/exposure. One could even expose manual whitebalance gains for usecases under known conditions. Having the SoftIsp as a modular playground for such cases is quite helpful.\n\nI too have no objections against the patch, but I wonder if it is necessary to add\nall the NULL pointer checks to the SoftIPA ? Would it not be better to just not create\nthe SoftIPA at all in this case ?\n\nNote either way is fine, just wondering ...\n\nRegards,\n\nHans","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id 1F5E2BD80A\n\tfor <parsemail@patchwork.libcamera.org>;\n\tTue, 27 Feb 2024 13:25:50 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 65F5B62867;\n\tTue, 27 Feb 2024 14:25:49 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.129.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 5B9BA62806\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 27 Feb 2024 14:25:48 +0100 (CET)","from mail-ed1-f71.google.com (mail-ed1-f71.google.com\n\t[209.85.208.71]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-269-Y_hagR75MnaB871WCwM73w-1; Tue, 27 Feb 2024 08:25:45 -0500","by mail-ed1-f71.google.com with SMTP id\n\t4fb4d7f45d1cf-56484d05dcaso2616095a12.0\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tTue, 27 Feb 2024 05:25:44 -0800 (PST)","from ?IPV6:2001:1c00:c32:7800:5bfa:a036:83f0:f9ec?\n\t(2001-1c00-0c32-7800-5bfa-a036-83f0-f9ec.cable.dynamic.v6.ziggo.nl.\n\t[2001:1c00:c32:7800:5bfa:a036:83f0:f9ec])\n\tby smtp.gmail.com with ESMTPSA id\n\tcq11-20020a056402220b00b005660742bf6bsm762334edb.52.2024.02.27.05.25.42\n\t(version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);\n\tTue, 27 Feb 2024 05:25:43 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com; dkim=pass (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"TRPxVcfy\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1709040347;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tcontent-transfer-encoding:content-transfer-encoding:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=iFxMv6M+6uU/uDAQRZTmcQC+pjxAmdU7q1VqFyczbPM=;\n\tb=TRPxVcfy60BumjdNYonn4nk8JtzcUsQosRzbwIRZ2jhTQroi+c4EbF+TUyyAp1y29kVFqP\n\tZffEQbW0tlKoto6anV43JU8qk8ugsSEHKnkhTdEwXleMTcFnrXIOmef+xZSRXMlJVIAqqI\n\twEpfmRYppxp/8J+Sorwc/g1a7Q3/YJ0=","X-MC-Unique":"Y_hagR75MnaB871WCwM73w-1","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1709040344; x=1709645144;\n\th=content-transfer-encoding:in-reply-to:from:references:cc:to\n\t:content-language:subject:user-agent:mime-version:date:message-id\n\t:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;\n\tbh=iFxMv6M+6uU/uDAQRZTmcQC+pjxAmdU7q1VqFyczbPM=;\n\tb=f17zRa7xRBNsrSPosQy9L7NAaECFUo/+F6CFlV+2jdAa0bo4/5enhjqmofsCGq7+vK\n\tIs9Py3/AJgEHHDcq8RHbmY23iXq0iWtw97AeIGymVNte5lu7rVeR94AnDO1t/YWsMCq0\n\tsntdcamUocHC+s74QhPwsOKhmzLJhxFnxIwKMxqrz7/si6sAdS1v+NjSSCeMvG0BenTj\n\tDuQsv5QlGXBpqQZKi/ZWbdiw6ISbQfbqGP/IZSrPKaiQLIE72wAq8hu9nQ6GC+6a0kWG\n\tWrFNOEXNwayiKDfdfnIAiuD6Lqiaait5SkjtF3jpSnl0oNrMbe8UgrErRO01NIvrMwtk\n\tS2Qw==","X-Forwarded-Encrypted":"i=1;\n\tAJvYcCVbTby6S6Z1wV8S75AHLs9hptv7JIBwO8wFL/6E0J7RJWBXg6YZxz3eyPrPR5nGubdKv3A8idUPFyDA7yIs9fHxMMbqzImh5rSNv1odc9txpHP1CQ==","X-Gm-Message-State":"AOJu0YyrOXxoPiS+hMe/2v92gGsKfNdCtSMB4X8J5x62lz/hqmR1VrJz\n\tZlkQhEvo2uz/ESpbUWsVc5sUMZxnsR+oju6tbc/u9UbhXVInSzfK4fHLwNoiIdW8zPHVZycn9B0\n\tNkr+mDyt8ciMairBz3C2qAw+xKZPK95BrBCcWM3yU2GJfagF1TncPf3xNUxdFwF3j4UWyRco=","X-Received":["by 2002:a05:6402:686:b0:566:414d:d70e with SMTP id\n\tf6-20020a056402068600b00566414dd70emr1089823edy.39.1709040343925; \n\tTue, 27 Feb 2024 05:25:43 -0800 (PST)","by 2002:a05:6402:686:b0:566:414d:d70e with SMTP id\n\tf6-20020a056402068600b00566414dd70emr1089809edy.39.1709040343566; \n\tTue, 27 Feb 2024 05:25:43 -0800 (PST)"],"X-Google-Smtp-Source":"AGHT+IFDHWJ0AuNNGN1KI/twOwWEu/2h3+Q2ulihuMX9tXe6azRIXTvehlHEfAfHkxzd4EPwxVMtIw==","Message-ID":"<4a340033-92f8-444b-abc2-2ddff33698c8@redhat.com>","Date":"Tue, 27 Feb 2024 14:25:42 +0100","MIME-Version":"1.0","User-Agent":"Mozilla Thunderbird","Subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","To":"Stefan Klug <stefan.klug@ideasonboard.com>,\n\tAndrei Konovalov <andrey.konovalov.ynk@gmail.com>,\n\tlibcamera-devel@lists.libcamera.org","References":"<20240214170122.60754-1-hdegoede@redhat.com>\n\t<20240214170122.60754-9-hdegoede@redhat.com>\n\t<659d6c77-e8f3-4d8a-abeb-95c791c42662@ideasonboard.com>\n\t<731614fe-d28f-4049-97ca-e7d637df893a@gmail.com>\n\t<dce98834-99ab-4941-aceb-28142ec998f9@ideasonboard.com>","From":"Hans de Goede <hdegoede@redhat.com>","In-Reply-To":"<dce98834-99ab-4941-aceb-28142ec998f9@ideasonboard.com>","X-Mimecast-Spam-Score":"0","X-Mimecast-Originator":"redhat.com","Content-Language":"en-US, nl","Content-Type":"text/plain; charset=UTF-8","Content-Transfer-Encoding":"8bit","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Cc":"Bryan O'Donoghue <bryan.odonoghue@linaro.org>,\n\tMaxime Ripard <mripard@redhat.com>, Pavel Machek <pavel@ucw.cz>,\n\tDennis Bonke <admin@dennisbonke.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}},{"id":28793,"web_url":"https://patchwork.libcamera.org/comment/28793/","msgid":"<87le74u1nj.fsf@redhat.com>","date":"2024-02-28T14:03:12","subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","submitter":{"id":177,"url":"https://patchwork.libcamera.org/api/people/177/","name":"Milan Zamazal","email":"mzamazal@redhat.com"},"content":"Hans de Goede <hdegoede@redhat.com> writes:\n\n> On 2/15/24 16:33, Milan Zamazal wrote:\n>> Hans de Goede <hdegoede@redhat.com> writes:\n>> \n>>> +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)\n>>> +{\n>>> +\tSize pattern_size = patternSize(inputFormat);\n>>> +\tunsigned int border_height = pattern_size.height;\n>>> +\n>>> +\tif (pattern_size.isNull())\n>>> +\t\treturn {};\n>>> +\n>>> +\t/* No need for top/bottom border with a pattern height of 2 */\n>>> +\tif (pattern_size.height == 2)\n>>> +\t\tborder_height = 0;\n>>> +\n>>> +\t/*\n>>> +\t * For debayer interpolation a border is kept around the entire image\n>>> +\t * and the minimum output size is pattern-height x pattern-width.\n>>> +\t */\n>> \n>> What if the output size is larger?  The border is quite impractical because it\n>> forces (or not?) the output size to be non-standard, assuming the camera\n>> provides common resolutions.  Consider e.g. full-HD camera resolution not being\n>> able to be output 1:1 to a full-HD display.\n>\n> Hardware ISPs also need a similar border, because including special hardware\n> to deal with the edges would be quite expensive both in silicon area as\n> well as in power consumption. So sensors typically have a slightly bigger\n> resolution then the standard resolutions. E.g. the ov2680 sensor has a\n> pixelarray of 1616x1216 pixels and the ov2740 used in ThinkPads has\n> 1932x1092 pixels.\n\nAnd can those extra pixels be obtained for softisp?  I guess so as my camera can\nprovide its full 3280x2464 resolution, which is a bit more than QUXGA resolution\n(3200x2400).  But when playing with different resolutions in the current branch,\nI get confused.\n\nWhen I request 1920x1080 resolution, 1920x1080 hardware resolution is selected\nand I get 1916x1080 output resolution, with distorted image due to y field of\nview being non-proportionally larger then x field of view (the image looks\nstretched horizontally or squeezed vertically).\n\nWhen I request 1924x1080, 3280x2464 hardware resolution is selected, the output\nresolution is 1924x1080, the frame rate is a bit lower and the image looks all\nright.\n\nIt looks like in the 1920x1080 case, this is what the hardware provides and the\nsoftisp has to cut off 4 horizontal pixels for the borders.  As for the\ndistortion, it looks like the image gets cropped from 3280 to 1924 horizontally\nwhile cropped from 2464 to something like 2160 and then compressed, rather than\ncropped, to 1080 vertically (a bug in a driver?).\n\nIn the 1924x1080 case, it looks like there are plenty of extra pixels, the\n(right) cropping happens elsewhere and there is no need to cut off anything in\nsoftisp from the requested resolution.\n\nI'm not sure my understanding is correct but anyway, I apparently cannot get\n1920x1080 output resolution (and probably other standard resolutions if\navailable directly from v4l2).  At least in theory, it should be possible to use\n3280x2464 for that but for the price of reduced performance.  Well, I think we\ncan live with this glitch in this version, but I'm still curious what's going on\nand whether things could be reasonably improved in future.\n\nRegards,\nMilan","headers":{"Return-Path":"<libcamera-devel-bounces@lists.libcamera.org>","X-Original-To":"parsemail@patchwork.libcamera.org","Delivered-To":"parsemail@patchwork.libcamera.org","Received":["from lancelot.ideasonboard.com (lancelot.ideasonboard.com\n\t[92.243.16.209])\n\tby patchwork.libcamera.org (Postfix) with ESMTPS id 4DBB6BD160\n\tfor <parsemail@patchwork.libcamera.org>;\n\tWed, 28 Feb 2024 14:03:22 +0000 (UTC)","from lancelot.ideasonboard.com (localhost [IPv6:::1])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTP id 4B5E562868;\n\tWed, 28 Feb 2024 15:03:21 +0100 (CET)","from us-smtp-delivery-124.mimecast.com\n\t(us-smtp-delivery-124.mimecast.com [170.10.129.124])\n\tby lancelot.ideasonboard.com (Postfix) with ESMTPS id 1405661C94\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tWed, 28 Feb 2024 15:03:18 +0100 (CET)","from mail-wm1-f69.google.com (mail-wm1-f69.google.com\n\t[209.85.128.69]) by relay.mimecast.com with ESMTP with STARTTLS\n\t(version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id\n\tus-mta-371-bb1iMtTjMwe2UYI00Sxf4Q-1; Wed, 28 Feb 2024 09:03:15 -0500","by mail-wm1-f69.google.com with SMTP id\n\t5b1f17b1804b1-412b7dafaa7so448025e9.0\n\tfor <libcamera-devel@lists.libcamera.org>;\n\tWed, 28 Feb 2024 06:03:15 -0800 (PST)","from nuthatch (ip-77-48-47-2.net.vodafone.cz. [77.48.47.2])\n\tby smtp.gmail.com with ESMTPSA id\n\tt8-20020a05600c198800b00412a7d9fb9csm2253400wmq.45.2024.02.28.06.03.13\n\t(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n\tWed, 28 Feb 2024 06:03:13 -0800 (PST)"],"Authentication-Results":"lancelot.ideasonboard.com;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key;\n\tunprotected) header.d=redhat.com header.i=@redhat.com\n\theader.b=\"JskdfHug\"; dkim-atps=neutral","DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1709128997;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\tto:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\tin-reply-to:in-reply-to:references:references;\n\tbh=SnDT6uexF5R7HLvwwMMC9N5cqTeVS2wIwzUa96mVFo4=;\n\tb=JskdfHugdxs1CxPN205vvlmAdbDeOfElc67/KIdvBr9fkQPO2XeatvPNwi+Fn1uXDtYjqQ\n\tlcuBfRyrM7z9DBgdaNp0h1DUmOU+l4tioTxGZsIk8OcjO7aUESzgMd4W0mpp3PBiKfqfBG\n\tzT0bxWYtjcUw/v42VNorxNakM8NhDO8=","X-MC-Unique":"bb1iMtTjMwe2UYI00Sxf4Q-1","X-Google-DKIM-Signature":"v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20230601; t=1709128994; x=1709733794;\n\th=mime-version:user-agent:message-id:date:references:in-reply-to\n\t:subject:cc:to:from:x-gm-message-state:from:to:cc:subject:date\n\t:message-id:reply-to;\n\tbh=SnDT6uexF5R7HLvwwMMC9N5cqTeVS2wIwzUa96mVFo4=;\n\tb=Dfkbdhj1y/9Q9y9PEdAnvatP1x2Sot5XWkDGs6AjoZovwXiMekgRt0hR43SgmBmp3y\n\t1d3IasbiSdoPftvQU+eeAlEUkOnrSoMr5rMDSZ9Jm59OWL+mytvyDdRzcK0a8oW2f2T7\n\tIuqC6H9LSLNYRFY7Nfx2Zk3d+E1otSP20ZQkEmOV4QlHd6G++njG4OskZ0m2oj4kaYPM\n\tDAphxtEoRHwMTeXDpa5ZpOUdFHGMoYB9vOwNYu8XS6Awdgm1eeB85KfUisYdDMuMeiPv\n\twCCF/oqJyy/GZDW6ezRyzvJsOFzF9nnH/cBrIToxFXxZKMrL6BqxzzDbdf67mz5SaS8O\n\tvSAQ==","X-Gm-Message-State":"AOJu0YydsieP4nBfYZBJ+HHjUIPOUiNil3XjFkN0R8RY2sVWkB5bLtBj\n\t1tfcqvNmt/QGaZYW5YHo886qY30lBcvGpLvMMMlAIzuc24T+rov+HPEPIqkyhiQJIZbMJUYFITj\n\tQPKsR6satVc2RVULul90UHL+jv1jUq1DaoyMD/fpoYYq0dXBS7osXyfgX33VhbLflOf52peE=","X-Received":["by 2002:a05:600c:4748:b0:412:b816:1587 with SMTP id\n\tw8-20020a05600c474800b00412b8161587mr178551wmo.4.1709128994431; \n\tWed, 28 Feb 2024 06:03:14 -0800 (PST)","by 2002:a05:600c:4748:b0:412:b816:1587 with SMTP id\n\tw8-20020a05600c474800b00412b8161587mr178524wmo.4.1709128993969; \n\tWed, 28 Feb 2024 06:03:13 -0800 (PST)"],"X-Google-Smtp-Source":"AGHT+IGfRFpLgIByMdjpvUhd0AUUVjb5NbUUCYIlBNspFlki6gQVC1oLUBNBovmjit+3+yOlE7yf3w==","From":"Milan Zamazal <mzamazal@redhat.com>","To":"Hans de Goede <hdegoede@redhat.com>","Subject":"Re: [PATCH v3 08/16] libcamera: software_isp: Add DebayerCpu class","In-Reply-To":"<996640ef-8fd5-43ed-bd83-7d5f71fb3782@redhat.com> (Hans de\n\tGoede's message of \"Tue, 27 Feb 2024 14:23:41 +0100\")","References":"<20240214170122.60754-1-hdegoede@redhat.com>\n\t<20240214170122.60754-9-hdegoede@redhat.com>\n\t<87bk8hd9ng.fsf@redhat.com>\n\t<996640ef-8fd5-43ed-bd83-7d5f71fb3782@redhat.com>","Date":"Wed, 28 Feb 2024 15:03:12 +0100","Message-ID":"<87le74u1nj.fsf@redhat.com>","User-Agent":"Gnus/5.13 (Gnus v5.13)","MIME-Version":"1.0","X-Mimecast-Spam-Score":"0","X-Mimecast-Originator":"redhat.com","Content-Type":"text/plain","X-BeenThere":"libcamera-devel@lists.libcamera.org","X-Mailman-Version":"2.1.29","Precedence":"list","List-Id":"<libcamera-devel.lists.libcamera.org>","List-Unsubscribe":"<https://lists.libcamera.org/options/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=unsubscribe>","List-Archive":"<https://lists.libcamera.org/pipermail/libcamera-devel/>","List-Post":"<mailto:libcamera-devel@lists.libcamera.org>","List-Help":"<mailto:libcamera-devel-request@lists.libcamera.org?subject=help>","List-Subscribe":"<https://lists.libcamera.org/listinfo/libcamera-devel>,\n\t<mailto:libcamera-devel-request@lists.libcamera.org?subject=subscribe>","Cc":"Maxime Ripard <mripard@redhat.com>, libcamera-devel@lists.libcamera.org, \n\tPavel Machek <pavel@ucw.cz>,\n\tBryan O'Donoghue <bryan.odonoghue@linaro.org>, \n\tDennis Bonke <admin@dennisbonke.com>","Errors-To":"libcamera-devel-bounces@lists.libcamera.org","Sender":"\"libcamera-devel\" <libcamera-devel-bounces@lists.libcamera.org>"}}]