@@ -53,11 +53,11 @@ public:
int configure(const StreamConfiguration &inputCfg);
void setWindow(const Rectangle &window);
- void startFrame(uint32_t frame);
- void finishFrame(uint32_t frame, uint32_t bufferId);
+ void startFrame(uint32_t frame, struct SwIspStats statsBuffer[], unsigned int statsBufferCount);
+ void finishFrame(uint32_t frame, uint32_t bufferId, struct SwIspStats statsBuffer[], unsigned int statsBufferCount);
void processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *input);
- void processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])
+ void processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], SwIspStats *stats)
{
if (frame % kStatPerNumFrames)
return;
@@ -66,10 +66,10 @@ public:
y >= (window_.y + window_.height))
return;
- (this->*stats0_)(src);
+ (this->*stats0_)(src, stats);
}
- void processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])
+ void processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], SwIspStats *stats)
{
if (frame % kStatPerNumFrames)
return;
@@ -78,27 +78,27 @@ public:
y >= (window_.y + window_.height))
return;
- (this->*stats2_)(src);
+ (this->*stats2_)(src, stats);
}
Signal<uint32_t, uint32_t> statsReady;
private:
- using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);
- using processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in);
+ using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[], SwIspStats *stats);
+ using processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in, SwIspStats *stats);
int setupStandardBayerOrder(BayerFormat::Order order);
/* Bayer 8 bpp unpacked */
- void statsBGGR8Line0(const uint8_t *src[]);
+ void statsBGGR8Line0(const uint8_t *src[], SwIspStats *stats);
/* Bayer 10 bpp unpacked */
- void statsBGGR10Line0(const uint8_t *src[]);
+ void statsBGGR10Line0(const uint8_t *src[], SwIspStats *stats);
/* Bayer 12 bpp unpacked */
- void statsBGGR12Line0(const uint8_t *src[]);
+ void statsBGGR12Line0(const uint8_t *src[], SwIspStats *stats);
/* Bayer 10 bpp packed */
- void statsBGGR10PLine0(const uint8_t *src[]);
- void statsGBRG10PLine0(const uint8_t *src[]);
+ void statsBGGR10PLine0(const uint8_t *src[], SwIspStats *stats);
+ void statsGBRG10PLine0(const uint8_t *src[], SwIspStats *stats);
- void processBayerFrame2(MappedFrameBuffer &in);
+ void processBayerFrame2(MappedFrameBuffer &in, SwIspStats *stats);
processFrameFn processFrame_;
@@ -117,7 +117,6 @@ private:
unsigned int stride_;
SharedMemObject<SwIspStats> sharedStats_;
- SwIspStats stats_;
Benchmark bench_;
};
@@ -673,7 +673,7 @@ void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)
for (unsigned int y = 0; y < yEnd; y += 2) {
shiftLinePointers(linePointers, src);
memcpyNextLine(linePointers);
- stats_->processLine0(frame, y, linePointers);
+ stats_->processLine0(frame, y, linePointers, &statsBuffer_);
(this->*debayer0_)(dst, linePointers);
src += inputConfig_.stride;
dst += outputConfig_.stride;
@@ -688,7 +688,7 @@ void DebayerCpu::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)
if (window_.y == 0) {
shiftLinePointers(linePointers, src);
memcpyNextLine(linePointers);
- stats_->processLine0(frame, yEnd, linePointers);
+ stats_->processLine0(frame, yEnd, linePointers, &statsBuffer_);
(this->*debayer0_)(dst, linePointers);
src += inputConfig_.stride;
dst += outputConfig_.stride;
@@ -724,7 +724,7 @@ void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)
for (unsigned int y = 0; y < window_.height; y += 4) {
shiftLinePointers(linePointers, src);
memcpyNextLine(linePointers);
- stats_->processLine0(frame, y, linePointers);
+ stats_->processLine0(frame, y, linePointers, &statsBuffer_);
(this->*debayer0_)(dst, linePointers);
src += inputConfig_.stride;
dst += outputConfig_.stride;
@@ -737,7 +737,7 @@ void DebayerCpu::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)
shiftLinePointers(linePointers, src);
memcpyNextLine(linePointers);
- stats_->processLine2(frame, y, linePointers);
+ stats_->processLine2(frame, y, linePointers, &statsBuffer_);
(this->*debayer2_)(dst, linePointers);
src += inputConfig_.stride;
dst += outputConfig_.stride;
@@ -866,7 +866,7 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
return;
}
- stats_->startFrame(frame);
+ stats_->startFrame(frame, &statsBuffer_, 1);
if (inputConfig_.patternSize.height == 2)
process2(frame, in.planes()[0].data(), out.planes()[0].data());
@@ -885,7 +885,7 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
*
* \todo Pass real bufferId once stats buffer passing is changed.
*/
- stats_->finishFrame(frame, 0);
+ stats_->finishFrame(frame, 0, &statsBuffer_, 1);
outputBufferReady.emit(output);
inputBufferReady.emit(input);
}
@@ -135,6 +135,7 @@ private:
LookupTable gammaLut_;
bool ccmEnabled_;
DebayerParams params_;
+ SwIspStats statsBuffer_;
debayerFn debayer0_;
debayerFn debayer1_;
@@ -182,14 +182,14 @@ static constexpr unsigned int kBlueYMul = 29; /* 0.114 * 256 */
yVal = r * kRedYMul; \
yVal += g * kGreenYMul; \
yVal += b * kBlueYMul; \
- stats_.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;
+ stats->yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;
#define SWSTATS_FINISH_LINE_STATS() \
- stats_.sum_.r() += sumR; \
- stats_.sum_.g() += sumG; \
- stats_.sum_.b() += sumB;
+ stats->sum_.r() += sumR; \
+ stats->sum_.g() += sumG; \
+ stats->sum_.b() += sumB;
-void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[], SwIspStats *stats)
{
const uint8_t *src0 = src[1] + window_.x;
const uint8_t *src1 = src[2] + window_.x;
@@ -214,7 +214,7 @@ void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])
SWSTATS_FINISH_LINE_STATS()
}
-void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[], SwIspStats *stats)
{
const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
@@ -240,7 +240,7 @@ void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])
SWSTATS_FINISH_LINE_STATS()
}
-void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[], SwIspStats *stats)
{
const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
@@ -266,7 +266,7 @@ void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])
SWSTATS_FINISH_LINE_STATS()
}
-void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])
+void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[], SwIspStats *stats)
{
const uint8_t *src0 = src[1] + window_.x * 5 / 4;
const uint8_t *src1 = src[2] + window_.x * 5 / 4;
@@ -292,7 +292,7 @@ void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])
SWSTATS_FINISH_LINE_STATS()
}
-void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
+void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[], SwIspStats *stats)
{
const uint8_t *src0 = src[1] + window_.x * 5 / 4;
const uint8_t *src1 = src[2] + window_.x * 5 / 4;
@@ -321,10 +321,13 @@ void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
/**
* \brief Reset state to start statistics gathering for a new frame
* \param[in] frame The frame number
+ * \param[in] statsBuffer Array of buffers storing stats
+ * \param[in] statsBufferCount number of buffers in the statsBuffer array
*
* This may only be called after a successful setWindow() call.
*/
-void SwStatsCpu::startFrame(uint32_t frame)
+void SwStatsCpu::startFrame(uint32_t frame,
+ struct SwIspStats statsBuffer[], unsigned int statsBufferCount)
{
if (frame % kStatPerNumFrames)
return;
@@ -332,21 +335,39 @@ void SwStatsCpu::startFrame(uint32_t frame)
if (window_.width == 0)
LOG(SwStatsCpu, Error) << "Calling startFrame() without setWindow()";
- stats_.sum_ = RGB<uint64_t>({ 0, 0, 0 });
- stats_.yHistogram.fill(0);
+ for (unsigned int i = 0; i < statsBufferCount; i++) {
+ statsBuffer[i].sum_ = RGB<uint64_t>({ 0, 0, 0 });
+ statsBuffer[i].yHistogram.fill(0);
+ }
}
/**
* \brief Finish statistics calculation for the current frame
* \param[in] frame The frame number
* \param[in] bufferId ID of the statistics buffer
+ * \param[in] statsBuffer Array of buffers storing stats
+ * \param[in] statsBufferCount number of buffers in the statsBuffer array
*
* This may only be called after a successful setWindow() call.
*/
-void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId)
+void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId,
+ struct SwIspStats statsBuffer[], unsigned int statsBufferCount)
{
- stats_.valid = frame % kStatPerNumFrames == 0;
- *sharedStats_ = stats_;
+ if (frame % kStatPerNumFrames) {
+ sharedStats_->valid = false;
+ statsReady.emit(frame, bufferId);
+ return;
+ }
+
+ sharedStats_->sum_ = RGB<uint64_t>({ 0, 0, 0 });
+ sharedStats_->yHistogram.fill(0);
+ for (unsigned int i = 0; i < statsBufferCount; i++) {
+ sharedStats_->sum_ += statsBuffer[i].sum_;
+ for (unsigned int j = 0; j < SwIspStats::kYHistogramSize; j++)
+ sharedStats_->yHistogram[j] += statsBuffer[i].yHistogram[j];
+ }
+
+ sharedStats_->valid = true;
statsReady.emit(frame, bufferId);
}
@@ -487,7 +508,7 @@ void SwStatsCpu::setWindow(const Rectangle &window)
window_.height &= ~(patternSize_.height - 1);
}
-void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
+void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in, SwIspStats *stats)
{
const uint8_t *src = in.planes()[0].data();
const uint8_t *linePointers[3];
@@ -504,7 +525,7 @@ void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
/* linePointers[0] is not used by any stats0_ functions */
linePointers[1] = src;
linePointers[2] = src + stride_;
- (this->*stats0_)(linePointers);
+ (this->*stats0_)(linePointers, stats);
src += stride_ * 2;
}
}
@@ -520,12 +541,14 @@ void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
void SwStatsCpu::processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *input)
{
if (frame % kStatPerNumFrames) {
- finishFrame(frame, bufferId);
+ finishFrame(frame, bufferId, NULL, 0);
return;
}
+ SwIspStats stats;
+
bench_.startFrame();
- startFrame(frame);
+ startFrame(frame, &stats, 1);
MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
if (!in.isValid()) {
@@ -533,8 +556,8 @@ void SwStatsCpu::processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *in
return;
}
- (this->*processFrame_)(in);
- finishFrame(frame, bufferId);
+ (this->*processFrame_)(in, &stats);
+ finishFrame(frame, bufferId, &stats, 1);
bench_.finishFrame();
}
Move the storage used to accumulate the RGB sums and the Y histogram out of the SwStatsCpu class and into the callers. The idea is to allow a single SwStatsCpu object to be shared between multiple threads each processing part of the image, with finishFrame() accumulating the per thread data into the final stats for the entire frame. This is a preparation patch for making DebayerCpu support multi-threading and this could also be used to make processFrame() multi-threaded. Benchmarking with the GPU-ISP which does separate swstats benchmarking, on the Uno-Q which has a weak CPU which is good for performance testing, shows 20-21ms to generate stats for a 3272x2464 frame both before and after this change. Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com> --- .../internal/software_isp/swstats_cpu.h | 29 ++++----- src/libcamera/software_isp/debayer_cpu.cpp | 12 ++-- src/libcamera/software_isp/debayer_cpu.h | 1 + src/libcamera/software_isp/swstats_cpu.cpp | 65 +++++++++++++------ 4 files changed, 65 insertions(+), 42 deletions(-)