[v8,04/15] ipa: libipa: Provide fixed point quantized traits
diff mbox series

Message ID 20260219-kbingham-quantizers-v8-4-2b6ff68ead26@ideasonboard.com
State New
Headers show
Series
  • libipa: Introduce a Quantized type
Related show

Commit Message

Kieran Bingham Feb. 19, 2026, 3:05 p.m. UTC
Extend the new Quantized type infrastructure by providing a
FixedPointQTraits template.

This allows construction of fixed point types with a Quantized storage
that allows easy reading of both the underlying quantized type value and
a floating point representation of that same value.

Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com>
Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>
Reviewed-by: Stefan Klug <stefan.klug@ideasonboard.com>
Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
---
 src/ipa/libipa/fixedpoint.cpp | 98 +++++++++++++++++++++++++++++++++++++++++++
 src/ipa/libipa/fixedpoint.h   | 74 ++++++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+)

Patch
diff mbox series

diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp
index 6b698fc5d680..b581ca8453ef 100644
--- a/src/ipa/libipa/fixedpoint.cpp
+++ b/src/ipa/libipa/fixedpoint.cpp
@@ -37,6 +37,104 @@  namespace ipa {
  * \return The converted value
  */
 
+/**
+ * \struct libcamera::ipa::FixedPointQTraits
+ * \brief Traits type implementing fixed-point quantization conversions
+ *
+ * The FixedPointQTraits structure defines a policy for mapping floating-point
+ * values to and from fixed-point 2's complement integer representations. It is
+ * parameterised by the number of integer bits \a I, fractional bits \a F, and
+ * the integral storage type \a T. The traits are used with Quantized<Traits> to
+ * create a quantized type that stores both the fixed-point representation and
+ * the corresponding floating-point value.
+ *
+ * The signedness of the type is determined by the signedness of \a T. For
+ * signed types, the number of integer bits in \a I includes the sign bit.
+ *
+ * Storage is determined by the total number of bits \a (I + F) and is
+ * automatically selected, but the internal storage type is always an unsigned
+ * integer to guarantee against sign extension when storing quantized values
+ * in registers.
+ *
+ * The trait exposes compile-time constants describing the bit layout, limits,
+ * and scaling factors used in the fixed-point representation.
+ *
+ * \tparam I Number of integer bits
+ * \tparam F Number of fractional bits
+ * \tparam T Integral type used to store the quantized value
+ */
+
+/**
+ * \typedef FixedPointQTraits::QuantizedType
+ * \brief The integral storage type used for the fixed-point representation
+ */
+
+/**
+ * \var FixedPointQTraits::qMin
+ * \brief Minimum representable quantized integer value
+ *
+ * This corresponds to the most negative value for signed formats or zero for
+ * unsigned formats.
+ */
+
+/**
+ * \var FixedPointQTraits::qMax
+ * \brief Maximum representable quantized integer value
+ */
+
+/**
+ * \var FixedPointQTraits::min
+ * \brief Minimum representable floating-point value corresponding to qMin
+ */
+
+/**
+ * \var FixedPointQTraits::max
+ * \brief Maximum representable floating-point value corresponding to qMax
+ */
+
+/**
+ * \fn FixedPointQTraits::fromFloat(float v)
+ * \brief Convert a floating-point value to a fixed-point integer
+ * \param[in] v The floating-point value to be converted
+ * \return The quantized fixed-point integer representation
+ *
+ * The conversion first clamps the floating-point input \a v to the range [min,
+ * max] and then rounds it to the nearest fixed-point value according to the
+ * scaling factor defined by the number of fractional bits F.
+ */
+
+/**
+ * \fn FixedPointQTraits::toFloat(QuantizedType q)
+ * \brief Convert a fixed-point integer to a floating-point value
+ * \param[in] q The fixed-point integer value to be converted
+ * \return The corresponding floating-point value
+ *
+ * The conversion sign-extends the integer value if required and divides by the
+ * scaling factor defined by the number of fractional bits F.
+ */
+
+/**
+ * \typedef Q
+ * \brief Define a signed fixed-point quantized type with automatic storage width
+ * \tparam I The number of integer bits
+ * \tparam F The number of fractional bits
+ *
+ * This alias defines a signed fixed-point quantized type using the
+ * \ref FixedPointQTraits trait and a suitable signed integer storage type
+ * automatically selected based on the total number of bits \a (I + F).
+ */
+
+/**
+ * \typedef UQ
+ * \brief Define an unsigned fixed-point quantized type with automatic storage width
+ * \tparam I The number of integer bits
+ * \tparam F The number of fractional bits
+ *
+ * This alias defines an unsigned fixed-point quantized type using the
+ * \ref FixedPointQTraits trait and a suitable unsigned integer storage type
+ * automatically selected based on the total number of bits \a (I + F).
+ */
+
 } /* namespace ipa */
 
 } /* namespace libcamera */
diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h
index aeb9bce3269b..df685e852bff 100644
--- a/src/ipa/libipa/fixedpoint.h
+++ b/src/ipa/libipa/fixedpoint.h
@@ -10,6 +10,8 @@ 
 #include <cmath>
 #include <type_traits>
 
+#include "quantized.h"
+
 namespace libcamera {
 
 namespace ipa {
@@ -63,6 +65,78 @@  constexpr R fixedToFloatingPoint(T number)
 	return static_cast<R>(t) / static_cast<R>(1 << F);
 }
 
+template<unsigned int I, unsigned int F, typename T>
+struct FixedPointQTraits {
+private:
+	static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral");
+	using UT = std::make_unsigned_t<T>;
+
+	static constexpr unsigned int bits = I + F;
+	static_assert(bits <= sizeof(UT) * 8, "FixedPointQTraits: too many bits for type UT");
+
+	/*
+	 * If fixed point storage is required with more than 24 bits, consider
+	 * updating this implementation to use double-precision floating point.
+	 */
+	static_assert(bits <= 24, "Floating point precision may be insufficient for more than 24 bits");
+
+	static constexpr UT bitMask = bits < sizeof(UT) * 8
+				    ? (UT{ 1 } << bits) - 1
+				    : ~UT{ 0 };
+
+public:
+	using QuantizedType = UT;
+
+	static constexpr UT qMin = std::is_signed_v<T>
+				 ? -(UT{ 1 } << (bits - 1))
+				 : 0;
+
+	static constexpr UT qMax = std::is_signed_v<T>
+				 ? (UT{ 1 } << (bits - 1)) - 1
+				 : bitMask;
+
+	static constexpr float toFloat(QuantizedType q)
+	{
+		return fixedToFloatingPoint<I, F, float, T>(q);
+	}
+
+	static constexpr float min = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMin));
+	static constexpr float max = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMax));
+
+	static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum");
+
+	/* Conversion functions required by Quantized<Traits> */
+	static QuantizedType fromFloat(float v)
+	{
+		v = std::clamp(v, min, max);
+		return floatingToFixedPoint<I, F, T, float>(v);
+	}
+};
+
+namespace details {
+
+template<unsigned int Bits>
+constexpr auto qtype()
+{
+	static_assert(Bits <= 32,
+		      "Unsupported number of bits for quantized type");
+
+	if constexpr (Bits <= 8)
+		return int8_t();
+	else if constexpr (Bits <= 16)
+		return int16_t();
+	else if constexpr (Bits <= 32)
+		return int32_t();
+}
+
+} /* namespace details */
+
+template<unsigned int I, unsigned int F>
+using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>;
+
+template<unsigned int I, unsigned int F>
+using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>;
+
 } /* namespace ipa */
 
 } /* namespace libcamera */