Message ID | 20240311141524.27192-9-hdegoede@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series |
|
Related | show |
Hans de Goede <hdegoede@redhat.com> writes: > Add CPU based debayering implementation. This initial implementation > only supports debayering packed 10 bits per pixel bayer data in > the 4 standard bayer orders. > > Doxygen documentation by Dennis Bonke. > > Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # sc8280xp Lenovo x13s > Tested-by: Pavel Machek <pavel@ucw.cz> > Reviewed-by: Pavel Machek <pavel@ucw.cz> > Co-developed-by: Dennis Bonke <admin@dennisbonke.com> > Signed-off-by: Dennis Bonke <admin@dennisbonke.com> > Co-developed-by: Andrey Konovalov <andrey.konovalov@linaro.org> > Signed-off-by: Andrey Konovalov <andrey.konovalov@linaro.org> > Co-developed-by: Pavel Machek <pavel@ucw.cz> > Signed-off-by: Pavel Machek <pavel@ucw.cz> > Signed-off-by: Hans de Goede <hdegoede@redhat.com> Reviewed-by: Milan Zamazal <mzamazal@redhat.com> > --- > Changes in v5: > - Rework debayer10P debayer functions to avoid: > clang -Werror,-Wfor-loop-analysis warnings turned into errors > > Changes in v3: > - Move debayer_cpu.h to src/libcamera/software_isp/ > - Move documentation to .cpp file > - Document how/why an array of src pointers is passed to > the debayer functions > --- > src/libcamera/software_isp/debayer_cpu.cpp | 626 +++++++++++++++++++++ > src/libcamera/software_isp/debayer_cpu.h | 143 +++++ > src/libcamera/software_isp/meson.build | 1 + > 3 files changed, 770 insertions(+) > create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp > create mode 100644 src/libcamera/software_isp/debayer_cpu.h > > diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp > new file mode 100644 > index 00000000..f932362c > --- /dev/null > +++ b/src/libcamera/software_isp/debayer_cpu.cpp > @@ -0,0 +1,626 @@ > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > +/* > + * Copyright (C) 2023, Linaro Ltd > + * Copyright (C) 2023, Red Hat Inc. > + * > + * Authors: > + * Hans de Goede <hdegoede@redhat.com> > + * > + * debayer_cpu.cpp - CPU based debayering class > + */ > + > +#include "debayer_cpu.h" > + > +#include <math.h> > +#include <stdlib.h> > +#include <time.h> > + > +#include <libcamera/formats.h> > + > +#include "libcamera/internal/bayer_format.h" > +#include "libcamera/internal/framebuffer.h" > +#include "libcamera/internal/mapped_framebuffer.h" > + > +namespace libcamera { > + > +/** > + * \class DebayerCpu > + * \brief Class for debayering on the CPU > + * > + * Implementation for CPU based debayering > + */ > + > +/** > + * \brief Constructs a DebayerCpu object. > + * \param[in] stats Pointer to the stats object to use. > + */ > +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats) > + : stats_(std::move(stats)), gamma_correction_(1.0) > +{ > +#ifdef __x86_64__ > + enableInputMemcpy_ = false; > +#else > + enableInputMemcpy_ = true; > +#endif > + /* Initialize gamma to 1.0 curve */ > + for (unsigned int i = 0; i < kGammaLookupSize; i++) > + gamma_[i] = i / (kGammaLookupSize / kRGBLookupSize); > + > + for (unsigned int i = 0; i < kMaxLineBuffers; i++) > + lineBuffers_[i] = nullptr; > +} > + > +DebayerCpu::~DebayerCpu() > +{ > + for (unsigned int i = 0; i < kMaxLineBuffers; i++) > + free(lineBuffers_[i]); > +} > + > +// RGR > +// GBG > +// RGR > +#define BGGR_BGR888(p, n, div) \ > + *dst++ = blue_[curr[x] / (div)]; \ > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ > + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ > + x++; > + > +// GBG > +// RGR > +// GBG > +#define GRBG_BGR888(p, n, div) \ > + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \ > + *dst++ = green_[curr[x] / (div)]; \ > + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ > + x++; > + > +// GRG > +// BGB > +// GRG > +#define GBRG_BGR888(p, n, div) \ > + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ > + *dst++ = green_[curr[x] / (div)]; \ > + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \ > + x++; > + > +// BGB > +// GRG > +// BGB > +#define RGGB_BGR888(p, n, div) \ > + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ > + *dst++ = red_[curr[x] / (div)]; \ > + x++; > + > +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + /* > + * For the first pixel getting a pixel from the previous column uses > + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels. > + * Same for last pixel (uses x + 2) and looking at the next column. > + */ > + for (int x = 0; x < width_in_bytes;) { > + /* First pixel */ > + BGGR_BGR888(2, 1, 1) > + /* Second pixel BGGR -> GBRG */ > + GBRG_BGR888(1, 1, 1) > + /* Same thing for third and fourth pixels */ > + BGGR_BGR888(1, 1, 1) > + GBRG_BGR888(1, 2, 1) > + /* Skip 5th src byte with 4 x 2 least-significant-bits */ > + x++; > + } > +} > + > +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + for (int x = 0; x < width_in_bytes;) { > + /* First pixel */ > + GRBG_BGR888(2, 1, 1) > + /* Second pixel GRBG -> RGGB */ > + RGGB_BGR888(1, 1, 1) > + /* Same thing for third and fourth pixels */ > + GRBG_BGR888(1, 1, 1) > + RGGB_BGR888(1, 2, 1) > + /* Skip 5th src byte with 4 x 2 least-significant-bits */ > + x++; > + } > +} > + > +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + for (int x = 0; x < width_in_bytes;) { > + /* Even pixel */ > + GBRG_BGR888(2, 1, 1) > + /* Odd pixel GBGR -> BGGR */ > + BGGR_BGR888(1, 1, 1) > + /* Same thing for next 2 pixels */ > + GBRG_BGR888(1, 1, 1) > + BGGR_BGR888(1, 2, 1) > + /* Skip 5th src byte with 4 x 2 least-significant-bits */ > + x++; > + } > +} > + > +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]) > +{ > + const int width_in_bytes = window_.width * 5 / 4; > + const uint8_t *prev = (const uint8_t *)src[0]; > + const uint8_t *curr = (const uint8_t *)src[1]; > + const uint8_t *next = (const uint8_t *)src[2]; > + > + for (int x = 0; x < width_in_bytes;) { > + /* Even pixel */ > + RGGB_BGR888(2, 1, 1) > + /* Odd pixel RGGB -> GRBG */ > + GRBG_BGR888(1, 1, 1) > + /* Same thing for next 2 pixels */ > + RGGB_BGR888(1, 1, 1) > + GRBG_BGR888(1, 2, 1) > + /* Skip 5th src byte with 4 x 2 least-significant-bits */ > + x++; > + } > +} > + > +static bool isStandardBayerOrder(BayerFormat::Order order) > +{ > + return order == BayerFormat::BGGR || order == BayerFormat::GBRG || > + order == BayerFormat::GRBG || order == BayerFormat::RGGB; > +} > + > +/* > + * Setup the Debayer object according to the passed in parameters. > + * Return 0 on success, a negative errno value on failure > + * (unsupported parameters). > + */ > +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config) > +{ > + BayerFormat bayerFormat = > + BayerFormat::fromPixelFormat(inputFormat); > + > + if (bayerFormat.bitDepth == 10 && > + bayerFormat.packing == BayerFormat::Packing::CSI2 && > + isStandardBayerOrder(bayerFormat.order)) { > + config.bpp = 10; > + config.patternSize.width = 4; /* 5 bytes per *4* pixels */ > + config.patternSize.height = 2; > + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 }); > + return 0; > + } > + > + LOG(Debayer, Info) > + << "Unsupported input format " << inputFormat.toString(); > + return -EINVAL; > +} > + > +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config) > +{ > + if (outputFormat == formats::RGB888) { > + config.bpp = 24; > + return 0; > + } > + > + LOG(Debayer, Info) > + << "Unsupported output format " << outputFormat.toString(); > + return -EINVAL; > +} > + > +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */ > +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat) > +{ > + BayerFormat bayerFormat = > + BayerFormat::fromPixelFormat(inputFormat); > + > + if (bayerFormat.bitDepth == 10 && > + bayerFormat.packing == BayerFormat::Packing::CSI2) { > + switch (bayerFormat.order) { > + case BayerFormat::BGGR: > + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888; > + return 0; > + case BayerFormat::GBRG: > + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888; > + return 0; > + case BayerFormat::GRBG: > + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888; > + return 0; > + case BayerFormat::RGGB: > + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888; > + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888; > + return 0; > + default: > + break; > + } > + } > + > + LOG(Debayer, Error) << "Unsupported input output format combination"; > + return -EINVAL; > +} > + > +int DebayerCpu::configure(const StreamConfiguration &inputCfg, > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) > +{ > + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) > + return -EINVAL; > + > + if (stats_->configure(inputCfg) != 0) > + return -EINVAL; > + > + const Size &stats_pattern_size = stats_->patternSize(); > + if (inputConfig_.patternSize.width != stats_pattern_size.width || > + inputConfig_.patternSize.height != stats_pattern_size.height) { > + LOG(Debayer, Error) > + << "mismatching stats and debayer pattern sizes for " > + << inputCfg.pixelFormat.toString(); > + return -EINVAL; > + } > + > + inputConfig_.stride = inputCfg.stride; > + > + if (outputCfgs.size() != 1) { > + LOG(Debayer, Error) > + << "Unsupported number of output streams: " > + << outputCfgs.size(); > + return -EINVAL; > + } > + > + const StreamConfiguration &outputCfg = outputCfgs[0]; > + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); > + std::tie(outputConfig_.stride, outputConfig_.frameSize) = > + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); > + > + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { > + LOG(Debayer, Error) > + << "Invalid output size/stride: " > + << "\n " << outputCfg.size << " (" << outSizeRange << ")" > + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; > + return -EINVAL; > + } > + > + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0) > + return -EINVAL; > + > + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & > + ~(inputConfig_.patternSize.width - 1); > + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & > + ~(inputConfig_.patternSize.height - 1); > + window_.width = outputCfg.size.width; > + window_.height = outputCfg.size.height; > + > + /* Don't pass x,y since process() already adjusts src before passing it */ > + stats_->setWindow(Rectangle(window_.size())); > + > + /* pad with patternSize.Width on both left and right side */ > + lineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8; > + lineBufferLength_ = window_.width * inputConfig_.bpp / 8 + > + 2 * lineBufferPadding_; > + for (unsigned int i = 0; > + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_; > + i++) { > + free(lineBuffers_[i]); > + lineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_); > + if (!lineBuffers_[i]) > + return -ENOMEM; > + } > + > + measuredFrames_ = 0; > + frameProcessTime_ = 0; > + > + return 0; > +} > + > +/* > + * Get width and height at which the bayer-pattern repeats. > + * Return pattern-size or an empty Size for an unsupported inputFormat. > + */ > +Size DebayerCpu::patternSize(PixelFormat inputFormat) > +{ > + DebayerCpu::DebayerInputConfig config; > + > + if (getInputConfig(inputFormat, config) != 0) > + return {}; > + > + return config.patternSize; > +} > + > +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat) > +{ > + DebayerCpu::DebayerInputConfig config; > + > + if (getInputConfig(inputFormat, config) != 0) > + return std::vector<PixelFormat>(); > + > + return config.outputFormats; > +} > + > +std::tuple<unsigned int, unsigned int> > +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) > +{ > + DebayerCpu::DebayerOutputConfig config; > + > + if (getOutputConfig(outputFormat, config) != 0) > + return std::make_tuple(0, 0); > + > + /* round up to multiple of 8 for 64 bits alignment */ > + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7; > + > + return std::make_tuple(stride, stride * size.height); > +} > + > +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[]) > +{ > + const unsigned int patternHeight = inputConfig_.patternSize.height; > + > + if (!enableInputMemcpy_) > + return; > + > + for (unsigned int i = 0; i < patternHeight; i++) { > + memcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_, > + lineBufferLength_); > + linePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_; > + } > + > + /* Point lineBufferIndex_ to first unused lineBuffer */ > + lineBufferIndex_ = patternHeight; > +} > + > +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src) > +{ > + const unsigned int patternHeight = inputConfig_.patternSize.height; > + > + for (unsigned int i = 0; i < patternHeight; i++) > + linePointers[i] = linePointers[i + 1]; > + > + linePointers[patternHeight] = src + > + (patternHeight / 2) * (int)inputConfig_.stride; > +} > + > +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[]) > +{ > + const unsigned int patternHeight = inputConfig_.patternSize.height; > + > + if (!enableInputMemcpy_) > + return; > + > + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_, > + lineBufferLength_); > + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_; > + > + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1); > +} > + > +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst) > +{ > + unsigned int y_end = window_.y + window_.height; > + /* Holds [0] previous- [1] current- [2] next-line */ > + const uint8_t *linePointers[3]; > + > + /* Adjust src to top left corner of the window */ > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; > + > + /* [x] becomes [x - 1] after initial shiftLinePointers() call */ > + if (window_.y) { > + linePointers[1] = src - inputConfig_.stride; /* previous-line */ > + linePointers[2] = src; > + } else { > + /* window_.y == 0, use the next line as prev line */ > + linePointers[1] = src + inputConfig_.stride; > + linePointers[2] = src; > + /* Last 2 lines also need special handling */ > + y_end -= 2; > + } > + > + setupInputMemcpy(linePointers); > + > + for (unsigned int y = window_.y; y < y_end; y += 2) { > + shiftLinePointers(linePointers, src); > + memcpyNextLine(linePointers); > + stats_->processLine0(y, linePointers); > + (this->*debayer0_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + memcpyNextLine(linePointers); > + (this->*debayer1_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + } > + > + if (window_.y == 0) { > + shiftLinePointers(linePointers, src); > + memcpyNextLine(linePointers); > + stats_->processLine0(y_end, linePointers); > + (this->*debayer0_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + /* next line may point outside of src, use prev. */ > + linePointers[2] = linePointers[0]; > + (this->*debayer1_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + } > +} > + > +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst) > +{ > + const unsigned int y_end = window_.y + window_.height; > + /* > + * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line > + * [3] 1-line-down [4] 2-lines-down. > + */ > + const uint8_t *linePointers[5]; > + > + /* Adjust src to top left corner of the window */ > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; > + > + /* [x] becomes [x - 1] after initial shiftLinePointers() call */ > + linePointers[1] = src - 2 * inputConfig_.stride; > + linePointers[2] = src - inputConfig_.stride; > + linePointers[3] = src; > + linePointers[4] = src + inputConfig_.stride; > + > + setupInputMemcpy(linePointers); > + > + for (unsigned int y = window_.y; y < y_end; y += 4) { > + shiftLinePointers(linePointers, src); > + memcpyNextLine(linePointers); > + stats_->processLine0(y, linePointers); > + (this->*debayer0_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + memcpyNextLine(linePointers); > + (this->*debayer1_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + memcpyNextLine(linePointers); > + stats_->processLine2(y, linePointers); > + (this->*debayer2_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + > + shiftLinePointers(linePointers, src); > + memcpyNextLine(linePointers); > + (this->*debayer3_)(dst, linePointers); > + src += inputConfig_.stride; > + dst += outputConfig_.stride; > + } > +} > + > +static inline int64_t timeDiff(timespec &after, timespec &before) > +{ > + return (after.tv_sec - before.tv_sec) * 1000000000LL + > + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec; > +} > + > +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) > +{ > + timespec frameStartTime; > + > + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) { > + frameStartTime = {}; > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime); > + } > + > + /* Apply DebayerParams */ > + if (params.gamma != gamma_correction_) { > + for (unsigned int i = 0; i < kGammaLookupSize; i++) > + gamma_[i] = UINT8_MAX * powf(i / (kGammaLookupSize - 1.0), params.gamma); > + > + gamma_correction_ = params.gamma; > + } > + > + for (unsigned int i = 0; i < kRGBLookupSize; i++) { > + constexpr unsigned int div = > + kRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize; > + unsigned int idx; > + > + /* Apply gamma after gain! */ > + idx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) }); > + red_[i] = gamma_[idx]; > + > + idx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) }); > + green_[i] = gamma_[idx]; > + > + idx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) }); > + blue_[i] = gamma_[idx]; > + } > + > + /* Copy metadata from the input buffer */ > + FrameMetadata &metadata = output->_d()->metadata(); > + metadata.status = input->metadata().status; > + metadata.sequence = input->metadata().sequence; > + metadata.timestamp = input->metadata().timestamp; > + > + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); > + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); > + if (!in.isValid() || !out.isValid()) { > + LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; > + metadata.status = FrameMetadata::FrameError; > + return; > + } > + > + stats_->startFrame(); > + > + if (inputConfig_.patternSize.height == 2) > + process2(in.planes()[0].data(), out.planes()[0].data()); > + else > + process4(in.planes()[0].data(), out.planes()[0].data()); > + > + metadata.planes()[0].bytesused = out.planes()[0].size(); > + > + /* Measure before emitting signals */ > + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure && > + ++measuredFrames_ > DebayerCpu::kFramesToSkip) { > + timespec frameEndTime = {}; > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime); > + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime); > + if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) { > + const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure - > + DebayerCpu::kFramesToSkip; > + LOG(Debayer, Info) > + << "Processed " << measuredFrames > + << " frames in " << frameProcessTime_ / 1000 << "us, " > + << frameProcessTime_ / (1000 * measuredFrames) > + << " us/frame"; > + } > + } > + > + stats_->finishFrame(); > + outputBufferReady.emit(output); > + inputBufferReady.emit(input); > +} > + > +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize) > +{ > + Size pattern_size = patternSize(inputFormat); > + unsigned int border_height = pattern_size.height; > + > + if (pattern_size.isNull()) > + return {}; > + > + /* No need for top/bottom border with a pattern height of 2 */ > + if (pattern_size.height == 2) > + border_height = 0; > + > + /* > + * For debayer interpolation a border is kept around the entire image > + * and the minimum output size is pattern-height x pattern-width. > + */ > + if (inputSize.width < (3 * pattern_size.width) || > + inputSize.height < (2 * border_height + pattern_size.height)) { > + LOG(Debayer, Warning) > + << "Input format size too small: " << inputSize.toString(); > + return {}; > + } > + > + return SizeRange(Size(pattern_size.width, pattern_size.height), > + Size((inputSize.width - 2 * pattern_size.width) & ~(pattern_size.width - 1), > + (inputSize.height - 2 * border_height) & ~(pattern_size.height - 1)), > + pattern_size.width, pattern_size.height); > +} > + > +} /* namespace libcamera */ > diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h > new file mode 100644 > index 00000000..8a51ed85 > --- /dev/null > +++ b/src/libcamera/software_isp/debayer_cpu.h > @@ -0,0 +1,143 @@ > +/* SPDX-License-Identifier: LGPL-2.1-or-later */ > +/* > + * Copyright (C) 2023, Linaro Ltd > + * Copyright (C) 2023, Red Hat Inc. > + * > + * Authors: > + * Hans de Goede <hdegoede@redhat.com> > + * > + * debayer_cpu.h - CPU based debayering header > + */ > + > +#pragma once > + > +#include <memory> > +#include <stdint.h> > +#include <vector> > + > +#include <libcamera/base/object.h> > + > +#include "debayer.h" > +#include "swstats_cpu.h" > + > +namespace libcamera { > + > +class DebayerCpu : public Debayer, public Object > +{ > +public: > + DebayerCpu(std::unique_ptr<SwStatsCpu> stats); > + ~DebayerCpu(); > + > + int configure(const StreamConfiguration &inputCfg, > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs); > + Size patternSize(PixelFormat inputFormat); > + std::vector<PixelFormat> formats(PixelFormat input); > + std::tuple<unsigned int, unsigned int> > + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size); > + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params); > + SizeRange sizes(PixelFormat inputFormat, const Size &inputSize); > + > + /** > + * \brief Get the file descriptor for the statistics. > + * > + * \return the file descriptor pointing to the statistics. > + */ > + const SharedFD &getStatsFD() { return stats_->getStatsFD(); } > + > + /** > + * \brief Get the output frame size. > + * > + * \return The output frame size. > + */ > + unsigned int frameSize() { return outputConfig_.frameSize; } > + > +private: > + /** > + * \brief Called to debayer 1 line of Bayer input data to output format > + * \param[out] dst Pointer to the start of the output line to write > + * \param[in] src The input data > + * > + * Input data is an array of (patternSize_.height + 1) src > + * pointers each pointing to a line in the Bayer source. The middle > + * element of the array will point to the actual line being processed. > + * Earlier element(s) will point to the previous line(s) and later > + * element(s) to the next line(s). > + * > + * These functions take an array of src pointers, rather than > + * a single src pointer + a stride for the source, so that when the src > + * is slow uncached memory it can be copied to faster memory before > + * debayering. Debayering a standard 2x2 Bayer pattern requires access > + * to the previous and next src lines for interpolating the missing > + * colors. To allow copying the src lines only once 3 temporary buffers > + * each holding a single line are used, re-using the oldest buffer for > + * the next line and the pointers are swizzled so that: > + * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line. > + * This way the 3 pointers passed to the debayer functions form > + * a sliding window over the src avoiding the need to copy each > + * line more than once. > + * > + * Similarly for bayer patterns which repeat every 4 lines, 5 src > + * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up > + * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down. > + */ > + using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]); > + > + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */ > + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); > + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); > + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]); > + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]); > + > + struct DebayerInputConfig { > + Size patternSize; > + unsigned int bpp; /* Memory used per pixel, not precision */ > + unsigned int stride; > + std::vector<PixelFormat> outputFormats; > + }; > + > + struct DebayerOutputConfig { > + unsigned int bpp; /* Memory used per pixel, not precision */ > + unsigned int stride; > + unsigned int frameSize; > + }; > + > + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config); > + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config); > + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat); > + void setupInputMemcpy(const uint8_t *linePointers[]); > + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src); > + void memcpyNextLine(const uint8_t *linePointers[]); > + void process2(const uint8_t *src, uint8_t *dst); > + void process4(const uint8_t *src, uint8_t *dst); > + > + static constexpr unsigned int kGammaLookupSize = 1024; > + static constexpr unsigned int kRGBLookupSize = 256; > + /* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */ > + static constexpr unsigned int kMaxLineBuffers = 5; > + > + std::array<uint8_t, kGammaLookupSize> gamma_; > + std::array<uint8_t, kRGBLookupSize> red_; > + std::array<uint8_t, kRGBLookupSize> green_; > + std::array<uint8_t, kRGBLookupSize> blue_; > + debayerFn debayer0_; > + debayerFn debayer1_; > + debayerFn debayer2_; > + debayerFn debayer3_; > + Rectangle window_; > + DebayerInputConfig inputConfig_; > + DebayerOutputConfig outputConfig_; > + std::unique_ptr<SwStatsCpu> stats_; > + uint8_t *lineBuffers_[kMaxLineBuffers]; > + unsigned int lineBufferLength_; > + unsigned int lineBufferPadding_; > + unsigned int lineBufferIndex_; > + bool enableInputMemcpy_; > + float gamma_correction_; > + unsigned int measuredFrames_; > + int64_t frameProcessTime_; > + /* Skip 30 frames for things to stabilize then measure 30 frames */ > + static constexpr unsigned int kFramesToSkip = 30; > + static constexpr unsigned int kLastFrameToMeasure = 60; > +}; > + > +} /* namespace libcamera */ > diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build > index 62095f61..71b46539 100644 > --- a/src/libcamera/software_isp/meson.build > +++ b/src/libcamera/software_isp/meson.build > @@ -9,5 +9,6 @@ endif > > libcamera_sources += files([ > 'debayer.cpp', > + 'debayer_cpu.cpp', > 'swstats_cpu.cpp', > ])
diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp new file mode 100644 index 00000000..f932362c --- /dev/null +++ b/src/libcamera/software_isp/debayer_cpu.cpp @@ -0,0 +1,626 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * debayer_cpu.cpp - CPU based debayering class + */ + +#include "debayer_cpu.h" + +#include <math.h> +#include <stdlib.h> +#include <time.h> + +#include <libcamera/formats.h> + +#include "libcamera/internal/bayer_format.h" +#include "libcamera/internal/framebuffer.h" +#include "libcamera/internal/mapped_framebuffer.h" + +namespace libcamera { + +/** + * \class DebayerCpu + * \brief Class for debayering on the CPU + * + * Implementation for CPU based debayering + */ + +/** + * \brief Constructs a DebayerCpu object. + * \param[in] stats Pointer to the stats object to use. + */ +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats) + : stats_(std::move(stats)), gamma_correction_(1.0) +{ +#ifdef __x86_64__ + enableInputMemcpy_ = false; +#else + enableInputMemcpy_ = true; +#endif + /* Initialize gamma to 1.0 curve */ + for (unsigned int i = 0; i < kGammaLookupSize; i++) + gamma_[i] = i / (kGammaLookupSize / kRGBLookupSize); + + for (unsigned int i = 0; i < kMaxLineBuffers; i++) + lineBuffers_[i] = nullptr; +} + +DebayerCpu::~DebayerCpu() +{ + for (unsigned int i = 0; i < kMaxLineBuffers; i++) + free(lineBuffers_[i]); +} + +// RGR +// GBG +// RGR +#define BGGR_BGR888(p, n, div) \ + *dst++ = blue_[curr[x] / (div)]; \ + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ + x++; + +// GBG +// RGR +// GBG +#define GRBG_BGR888(p, n, div) \ + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \ + *dst++ = green_[curr[x] / (div)]; \ + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ + x++; + +// GRG +// BGB +// GRG +#define GBRG_BGR888(p, n, div) \ + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ + *dst++ = green_[curr[x] / (div)]; \ + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \ + x++; + +// BGB +// GRG +// BGB +#define RGGB_BGR888(p, n, div) \ + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ + *dst++ = red_[curr[x] / (div)]; \ + x++; + +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + /* + * For the first pixel getting a pixel from the previous column uses + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels. + * Same for last pixel (uses x + 2) and looking at the next column. + */ + for (int x = 0; x < width_in_bytes;) { + /* First pixel */ + BGGR_BGR888(2, 1, 1) + /* Second pixel BGGR -> GBRG */ + GBRG_BGR888(1, 1, 1) + /* Same thing for third and fourth pixels */ + BGGR_BGR888(1, 1, 1) + GBRG_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + for (int x = 0; x < width_in_bytes;) { + /* First pixel */ + GRBG_BGR888(2, 1, 1) + /* Second pixel GRBG -> RGGB */ + RGGB_BGR888(1, 1, 1) + /* Same thing for third and fourth pixels */ + GRBG_BGR888(1, 1, 1) + RGGB_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + for (int x = 0; x < width_in_bytes;) { + /* Even pixel */ + GBRG_BGR888(2, 1, 1) + /* Odd pixel GBGR -> BGGR */ + BGGR_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + GBRG_BGR888(1, 1, 1) + BGGR_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int width_in_bytes = window_.width * 5 / 4; + const uint8_t *prev = (const uint8_t *)src[0]; + const uint8_t *curr = (const uint8_t *)src[1]; + const uint8_t *next = (const uint8_t *)src[2]; + + for (int x = 0; x < width_in_bytes;) { + /* Even pixel */ + RGGB_BGR888(2, 1, 1) + /* Odd pixel RGGB -> GRBG */ + GRBG_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + RGGB_BGR888(1, 1, 1) + GRBG_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +static bool isStandardBayerOrder(BayerFormat::Order order) +{ + return order == BayerFormat::BGGR || order == BayerFormat::GBRG || + order == BayerFormat::GRBG || order == BayerFormat::RGGB; +} + +/* + * Setup the Debayer object according to the passed in parameters. + * Return 0 on success, a negative errno value on failure + * (unsupported parameters). + */ +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputFormat); + + if (bayerFormat.bitDepth == 10 && + bayerFormat.packing == BayerFormat::Packing::CSI2 && + isStandardBayerOrder(bayerFormat.order)) { + config.bpp = 10; + config.patternSize.width = 4; /* 5 bytes per *4* pixels */ + config.patternSize.height = 2; + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 }); + return 0; + } + + LOG(Debayer, Info) + << "Unsupported input format " << inputFormat.toString(); + return -EINVAL; +} + +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config) +{ + if (outputFormat == formats::RGB888) { + config.bpp = 24; + return 0; + } + + LOG(Debayer, Info) + << "Unsupported output format " << outputFormat.toString(); + return -EINVAL; +} + +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */ +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputFormat); + + if (bayerFormat.bitDepth == 10 && + bayerFormat.packing == BayerFormat::Packing::CSI2) { + switch (bayerFormat.order) { + case BayerFormat::BGGR: + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888; + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888; + return 0; + case BayerFormat::GBRG: + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888; + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888; + return 0; + case BayerFormat::GRBG: + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888; + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888; + return 0; + case BayerFormat::RGGB: + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888; + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888; + return 0; + default: + break; + } + } + + LOG(Debayer, Error) << "Unsupported input output format combination"; + return -EINVAL; +} + +int DebayerCpu::configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) +{ + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) + return -EINVAL; + + if (stats_->configure(inputCfg) != 0) + return -EINVAL; + + const Size &stats_pattern_size = stats_->patternSize(); + if (inputConfig_.patternSize.width != stats_pattern_size.width || + inputConfig_.patternSize.height != stats_pattern_size.height) { + LOG(Debayer, Error) + << "mismatching stats and debayer pattern sizes for " + << inputCfg.pixelFormat.toString(); + return -EINVAL; + } + + inputConfig_.stride = inputCfg.stride; + + if (outputCfgs.size() != 1) { + LOG(Debayer, Error) + << "Unsupported number of output streams: " + << outputCfgs.size(); + return -EINVAL; + } + + const StreamConfiguration &outputCfg = outputCfgs[0]; + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); + std::tie(outputConfig_.stride, outputConfig_.frameSize) = + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); + + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { + LOG(Debayer, Error) + << "Invalid output size/stride: " + << "\n " << outputCfg.size << " (" << outSizeRange << ")" + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; + return -EINVAL; + } + + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0) + return -EINVAL; + + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & + ~(inputConfig_.patternSize.width - 1); + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & + ~(inputConfig_.patternSize.height - 1); + window_.width = outputCfg.size.width; + window_.height = outputCfg.size.height; + + /* Don't pass x,y since process() already adjusts src before passing it */ + stats_->setWindow(Rectangle(window_.size())); + + /* pad with patternSize.Width on both left and right side */ + lineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8; + lineBufferLength_ = window_.width * inputConfig_.bpp / 8 + + 2 * lineBufferPadding_; + for (unsigned int i = 0; + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_; + i++) { + free(lineBuffers_[i]); + lineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_); + if (!lineBuffers_[i]) + return -ENOMEM; + } + + measuredFrames_ = 0; + frameProcessTime_ = 0; + + return 0; +} + +/* + * Get width and height at which the bayer-pattern repeats. + * Return pattern-size or an empty Size for an unsupported inputFormat. + */ +Size DebayerCpu::patternSize(PixelFormat inputFormat) +{ + DebayerCpu::DebayerInputConfig config; + + if (getInputConfig(inputFormat, config) != 0) + return {}; + + return config.patternSize; +} + +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat) +{ + DebayerCpu::DebayerInputConfig config; + + if (getInputConfig(inputFormat, config) != 0) + return std::vector<PixelFormat>(); + + return config.outputFormats; +} + +std::tuple<unsigned int, unsigned int> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) +{ + DebayerCpu::DebayerOutputConfig config; + + if (getOutputConfig(outputFormat, config) != 0) + return std::make_tuple(0, 0); + + /* round up to multiple of 8 for 64 bits alignment */ + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7; + + return std::make_tuple(stride, stride * size.height); +} + +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[]) +{ + const unsigned int patternHeight = inputConfig_.patternSize.height; + + if (!enableInputMemcpy_) + return; + + for (unsigned int i = 0; i < patternHeight; i++) { + memcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_, + lineBufferLength_); + linePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_; + } + + /* Point lineBufferIndex_ to first unused lineBuffer */ + lineBufferIndex_ = patternHeight; +} + +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src) +{ + const unsigned int patternHeight = inputConfig_.patternSize.height; + + for (unsigned int i = 0; i < patternHeight; i++) + linePointers[i] = linePointers[i + 1]; + + linePointers[patternHeight] = src + + (patternHeight / 2) * (int)inputConfig_.stride; +} + +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[]) +{ + const unsigned int patternHeight = inputConfig_.patternSize.height; + + if (!enableInputMemcpy_) + return; + + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_, + lineBufferLength_); + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_; + + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1); +} + +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst) +{ + unsigned int y_end = window_.y + window_.height; + /* Holds [0] previous- [1] current- [2] next-line */ + const uint8_t *linePointers[3]; + + /* Adjust src to top left corner of the window */ + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; + + /* [x] becomes [x - 1] after initial shiftLinePointers() call */ + if (window_.y) { + linePointers[1] = src - inputConfig_.stride; /* previous-line */ + linePointers[2] = src; + } else { + /* window_.y == 0, use the next line as prev line */ + linePointers[1] = src + inputConfig_.stride; + linePointers[2] = src; + /* Last 2 lines also need special handling */ + y_end -= 2; + } + + setupInputMemcpy(linePointers); + + for (unsigned int y = window_.y; y < y_end; y += 2) { + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine0(y, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } + + if (window_.y == 0) { + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine0(y_end, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + /* next line may point outside of src, use prev. */ + linePointers[2] = linePointers[0]; + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } +} + +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst) +{ + const unsigned int y_end = window_.y + window_.height; + /* + * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line + * [3] 1-line-down [4] 2-lines-down. + */ + const uint8_t *linePointers[5]; + + /* Adjust src to top left corner of the window */ + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; + + /* [x] becomes [x - 1] after initial shiftLinePointers() call */ + linePointers[1] = src - 2 * inputConfig_.stride; + linePointers[2] = src - inputConfig_.stride; + linePointers[3] = src; + linePointers[4] = src + inputConfig_.stride; + + setupInputMemcpy(linePointers); + + for (unsigned int y = window_.y; y < y_end; y += 4) { + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine0(y, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine2(y, linePointers); + (this->*debayer2_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + (this->*debayer3_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } +} + +static inline int64_t timeDiff(timespec &after, timespec &before) +{ + return (after.tv_sec - before.tv_sec) * 1000000000LL + + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec; +} + +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) +{ + timespec frameStartTime; + + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) { + frameStartTime = {}; + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime); + } + + /* Apply DebayerParams */ + if (params.gamma != gamma_correction_) { + for (unsigned int i = 0; i < kGammaLookupSize; i++) + gamma_[i] = UINT8_MAX * powf(i / (kGammaLookupSize - 1.0), params.gamma); + + gamma_correction_ = params.gamma; + } + + for (unsigned int i = 0; i < kRGBLookupSize; i++) { + constexpr unsigned int div = + kRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize; + unsigned int idx; + + /* Apply gamma after gain! */ + idx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) }); + red_[i] = gamma_[idx]; + + idx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) }); + green_[i] = gamma_[idx]; + + idx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) }); + blue_[i] = gamma_[idx]; + } + + /* Copy metadata from the input buffer */ + FrameMetadata &metadata = output->_d()->metadata(); + metadata.status = input->metadata().status; + metadata.sequence = input->metadata().sequence; + metadata.timestamp = input->metadata().timestamp; + + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); + if (!in.isValid() || !out.isValid()) { + LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; + metadata.status = FrameMetadata::FrameError; + return; + } + + stats_->startFrame(); + + if (inputConfig_.patternSize.height == 2) + process2(in.planes()[0].data(), out.planes()[0].data()); + else + process4(in.planes()[0].data(), out.planes()[0].data()); + + metadata.planes()[0].bytesused = out.planes()[0].size(); + + /* Measure before emitting signals */ + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure && + ++measuredFrames_ > DebayerCpu::kFramesToSkip) { + timespec frameEndTime = {}; + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime); + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime); + if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) { + const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure - + DebayerCpu::kFramesToSkip; + LOG(Debayer, Info) + << "Processed " << measuredFrames + << " frames in " << frameProcessTime_ / 1000 << "us, " + << frameProcessTime_ / (1000 * measuredFrames) + << " us/frame"; + } + } + + stats_->finishFrame(); + outputBufferReady.emit(output); + inputBufferReady.emit(input); +} + +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize) +{ + Size pattern_size = patternSize(inputFormat); + unsigned int border_height = pattern_size.height; + + if (pattern_size.isNull()) + return {}; + + /* No need for top/bottom border with a pattern height of 2 */ + if (pattern_size.height == 2) + border_height = 0; + + /* + * For debayer interpolation a border is kept around the entire image + * and the minimum output size is pattern-height x pattern-width. + */ + if (inputSize.width < (3 * pattern_size.width) || + inputSize.height < (2 * border_height + pattern_size.height)) { + LOG(Debayer, Warning) + << "Input format size too small: " << inputSize.toString(); + return {}; + } + + return SizeRange(Size(pattern_size.width, pattern_size.height), + Size((inputSize.width - 2 * pattern_size.width) & ~(pattern_size.width - 1), + (inputSize.height - 2 * border_height) & ~(pattern_size.height - 1)), + pattern_size.width, pattern_size.height); +} + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h new file mode 100644 index 00000000..8a51ed85 --- /dev/null +++ b/src/libcamera/software_isp/debayer_cpu.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * debayer_cpu.h - CPU based debayering header + */ + +#pragma once + +#include <memory> +#include <stdint.h> +#include <vector> + +#include <libcamera/base/object.h> + +#include "debayer.h" +#include "swstats_cpu.h" + +namespace libcamera { + +class DebayerCpu : public Debayer, public Object +{ +public: + DebayerCpu(std::unique_ptr<SwStatsCpu> stats); + ~DebayerCpu(); + + int configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs); + Size patternSize(PixelFormat inputFormat); + std::vector<PixelFormat> formats(PixelFormat input); + std::tuple<unsigned int, unsigned int> + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size); + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params); + SizeRange sizes(PixelFormat inputFormat, const Size &inputSize); + + /** + * \brief Get the file descriptor for the statistics. + * + * \return the file descriptor pointing to the statistics. + */ + const SharedFD &getStatsFD() { return stats_->getStatsFD(); } + + /** + * \brief Get the output frame size. + * + * \return The output frame size. + */ + unsigned int frameSize() { return outputConfig_.frameSize; } + +private: + /** + * \brief Called to debayer 1 line of Bayer input data to output format + * \param[out] dst Pointer to the start of the output line to write + * \param[in] src The input data + * + * Input data is an array of (patternSize_.height + 1) src + * pointers each pointing to a line in the Bayer source. The middle + * element of the array will point to the actual line being processed. + * Earlier element(s) will point to the previous line(s) and later + * element(s) to the next line(s). + * + * These functions take an array of src pointers, rather than + * a single src pointer + a stride for the source, so that when the src + * is slow uncached memory it can be copied to faster memory before + * debayering. Debayering a standard 2x2 Bayer pattern requires access + * to the previous and next src lines for interpolating the missing + * colors. To allow copying the src lines only once 3 temporary buffers + * each holding a single line are used, re-using the oldest buffer for + * the next line and the pointers are swizzled so that: + * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line. + * This way the 3 pointers passed to the debayer functions form + * a sliding window over the src avoiding the need to copy each + * line more than once. + * + * Similarly for bayer patterns which repeat every 4 lines, 5 src + * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up + * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down. + */ + using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]); + + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */ + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]); + + struct DebayerInputConfig { + Size patternSize; + unsigned int bpp; /* Memory used per pixel, not precision */ + unsigned int stride; + std::vector<PixelFormat> outputFormats; + }; + + struct DebayerOutputConfig { + unsigned int bpp; /* Memory used per pixel, not precision */ + unsigned int stride; + unsigned int frameSize; + }; + + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config); + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config); + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat); + void setupInputMemcpy(const uint8_t *linePointers[]); + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src); + void memcpyNextLine(const uint8_t *linePointers[]); + void process2(const uint8_t *src, uint8_t *dst); + void process4(const uint8_t *src, uint8_t *dst); + + static constexpr unsigned int kGammaLookupSize = 1024; + static constexpr unsigned int kRGBLookupSize = 256; + /* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */ + static constexpr unsigned int kMaxLineBuffers = 5; + + std::array<uint8_t, kGammaLookupSize> gamma_; + std::array<uint8_t, kRGBLookupSize> red_; + std::array<uint8_t, kRGBLookupSize> green_; + std::array<uint8_t, kRGBLookupSize> blue_; + debayerFn debayer0_; + debayerFn debayer1_; + debayerFn debayer2_; + debayerFn debayer3_; + Rectangle window_; + DebayerInputConfig inputConfig_; + DebayerOutputConfig outputConfig_; + std::unique_ptr<SwStatsCpu> stats_; + uint8_t *lineBuffers_[kMaxLineBuffers]; + unsigned int lineBufferLength_; + unsigned int lineBufferPadding_; + unsigned int lineBufferIndex_; + bool enableInputMemcpy_; + float gamma_correction_; + unsigned int measuredFrames_; + int64_t frameProcessTime_; + /* Skip 30 frames for things to stabilize then measure 30 frames */ + static constexpr unsigned int kFramesToSkip = 30; + static constexpr unsigned int kLastFrameToMeasure = 60; +}; + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build index 62095f61..71b46539 100644 --- a/src/libcamera/software_isp/meson.build +++ b/src/libcamera/software_isp/meson.build @@ -9,5 +9,6 @@ endif libcamera_sources += files([ 'debayer.cpp', + 'debayer_cpu.cpp', 'swstats_cpu.cpp', ])