| Message ID | 20260213-kbingham-quantizers-v7-4-1626b9aaabf1@ideasonboard.com |
|---|---|
| State | Accepted |
| Headers | show |
| Series |
|
| Related | show |
Quoting Kieran Bingham (2026-02-14 01:57:43) > Extend the new Quantized type infrastructure by providing a > FixedPointQTraits template. > > This allows construction of fixed point types with a Quantized storage > that allows easy reading of both the underlying quantized type value and > a floating point representation of that same value. > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> Reviewed-by: Paul Elder <paul.elder@ideasonboard.com> > > --- > v4: > - Assert that the given type has enough bits for the usage > - Use unsigned types for calculating qmin/qmax > - Reorder toFloat/fromFloat and min/max for future inlining > - Make toFloat and fromFloat constexpr > > v5: > - Make UT, Bits and Bitmask private (and remove doxygen) > - Remove constexpr from fromFloat which uses std::round (only constexpr > in C++23) > - static_assert that min<max when converted > - Provide new Q and UQ automatic width types (Thanks Barnabás) > - Convert types to shortened Q/UQ automatic widths > - Use automatic width Q/UQ for 12,4 > - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask > - Remove typedefs for Q1_7 etc > > v6: > - Use 'quantized' over 'quantised' > - Document sign is based on T and number of bits includes sign bit > > - Document that fromFloat also clamps between [min, max] > > - Remove 64 bit support. We have 32 bit assumptions on fromFloat > > - Restrict to 24 bits, to stay compatible with float types > > v7: > - Use unsigned storage and ensure we don't have sign extension issues. > --- > src/ipa/libipa/fixedpoint.cpp | 98 +++++++++++++++++++++++++++++++++++++++++++ > src/ipa/libipa/fixedpoint.h | 74 ++++++++++++++++++++++++++++++++ > 2 files changed, 172 insertions(+) > > diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp > index 6b698fc5d680..caa9ce0fc1ec 100644 > --- a/src/ipa/libipa/fixedpoint.cpp > +++ b/src/ipa/libipa/fixedpoint.cpp > @@ -37,6 +37,104 @@ namespace ipa { > * \return The converted value > */ > > +/** > + * \struct libcamera::ipa::FixedPointQTraits > + * \brief Traits type implementing fixed-point quantisation conversions > + * > + * The FixedPointQTraits structure defines a policy for mapping floating-point > + * values to and from fixed-point integer representations. It is parameterised > + * by the number of integer bits \a I, fractional bits \a F, and the integral > + * storage type \a T. The traits are used with Quantized<Traits> to create a > + * quantized type that stores both the fixed-point representation and the > + * corresponding floating-point value. > + * > + * The signedness of the type is determined by the signedness of \a T. For > + * signed types, the number of integer bits in \a I includes the sign bit. > + * > + * Storage is determined by the total number of bits \a (I + F) and is > + * automatically selected, but the internal storage type is always an unsigned > + * integer to guarantee against sign extension when storing quantized values > + * in registers. > + * > + * The trait exposes compile-time constants describing the bit layout, limits, > + * and scaling factors used in the fixed-point representation. > + * > + * \tparam I Number of integer bits > + * \tparam F Number of fractional bits > + * \tparam T Integral type used to store the quantized value > + */ > + > +/** > + * \typedef FixedPointQTraits::QuantizedType > + * \brief The integral storage type used for the fixed-point representation > + */ > + > +/** > + * \var FixedPointQTraits::qMin > + * \brief Minimum representable quantized integer value > + * > + * This corresponds to the most negative value for signed formats or zero for > + * unsigned formats. > + */ > + > +/** > + * \var FixedPointQTraits::qMax > + * \brief Maximum representable quantized integer value > + */ > + > +/** > + * \var FixedPointQTraits::min > + * \brief Minimum representable floating-point value corresponding to qMin > + */ > + > +/** > + * \var FixedPointQTraits::max > + * \brief Maximum representable floating-point value corresponding to qMax > + */ > + > +/** > + * \fn FixedPointQTraits::fromFloat(float v) > + * \brief Convert a floating-point value to a fixed-point integer > + * \param[in] v The floating-point value to be converted > + * \return The quantized fixed-point integer representation > + * > + * The conversion first clamps the floating-point input \a v to the range [min, > + * max] and then rounds it to the nearest integer according to the scaling > + * factor defined by the number of fractional bits F. > + */ > + > +/** > + * \fn FixedPointQTraits::toFloat(QuantizedType q) > + * \brief Convert a fixed-point integer to a floating-point value > + * \param[in] q The fixed-point integer value to be converted > + * \return The corresponding floating-point value > + * > + * The conversion sign-extends the integer value if required and divides by the > + * scaling factor defined by the number of fractional bits F. > + */ > + > +/** > + * \typedef Q > + * \brief Define a signed fixed-point quantized type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines a signed fixed-point quantized type using the > + * \ref FixedPointQTraits trait and a suitable signed integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > +/** > + * \typedef UQ > + * \brief Define an unsigned fixed-point quantized type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines an unsigned fixed-point quantized type using the > + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > } /* namespace ipa */ > > } /* namespace libcamera */ > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > index aeb9bce3269b..b6b611df7fc3 100644 > --- a/src/ipa/libipa/fixedpoint.h > +++ b/src/ipa/libipa/fixedpoint.h > @@ -10,6 +10,8 @@ > #include <cmath> > #include <type_traits> > > +#include "quantized.h" > + > namespace libcamera { > > namespace ipa { > @@ -63,6 +65,78 @@ constexpr R fixedToFloatingPoint(T number) > return static_cast<R>(t) / static_cast<R>(1 << F); > } > > +template<unsigned int I, unsigned int F, typename T> > +struct FixedPointQTraits { > +private: > + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); > + using UT = std::make_unsigned_t<T>; > + > + static constexpr unsigned int bits = I + F; > + static_assert(bits <= sizeof(UT) * 8, "FixedPointQTraits: too many bits for type UT"); > + > + /* > + * If fixed point storage is required with more than 24 bits, consider > + * updating this implementation to use double-precision floating point. > + */ > + static_assert(bits <= 24, "Floating point precision may be insufficient for more than 24 bits"); > + > + static constexpr UT bitMask = bits < sizeof(UT) * 8 > + ? (UT{ 1 } << bits) - 1 > + : ~UT{ 0 }; > + > +public: > + using QuantizedType = UT; > + > + static constexpr UT qMin = std::is_signed_v<T> > + ? -(UT{ 1 } << (bits - 1)) > + : 0; > + > + static constexpr UT qMax = std::is_signed_v<T> > + ? (UT{ 1 } << (bits - 1)) - 1 > + : bitMask; > + > + static constexpr float toFloat(QuantizedType q) > + { > + return fixedToFloatingPoint<I, F, float, T>(q); > + } > + > + static constexpr float min = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMin)); > + static constexpr float max = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMax)); > + > + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); > + > + /* Conversion functions required by Quantized<Traits> */ > + static QuantizedType fromFloat(float v) > + { > + v = std::clamp(v, min, max); > + return floatingToFixedPoint<I, F, T, float>(v); > + } > +}; > + > +namespace details { > + > +template<unsigned int Bits> > +constexpr auto qtype() > +{ > + static_assert(Bits <= 32, > + "Unsupported number of bits for quantized type"); > + > + if constexpr (Bits <= 8) > + return int8_t(); > + else if constexpr (Bits <= 16) > + return int16_t(); > + else if constexpr (Bits <= 32) > + return int32_t(); > +} > + > +} /* namespace details */ > + > +template<unsigned int I, unsigned int F> > +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; > + > +template<unsigned int I, unsigned int F> > +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; > + > } /* namespace ipa */ > > } /* namespace libcamera */ > > -- > 2.52.0 >
Quoting Kieran Bingham (2026-02-13 17:57:43) > Extend the new Quantized type infrastructure by providing a > FixedPointQTraits template. > > This allows construction of fixed point types with a Quantized storage > that allows easy reading of both the underlying quantized type value and > a floating point representation of that same value. > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> No with the internals set to unsigned there is even more reason to Reviewed-by: Stefan Klug <stefan.klug@ideasonboard.com> Cheers, Stefan > > --- > v4: > - Assert that the given type has enough bits for the usage > - Use unsigned types for calculating qmin/qmax > - Reorder toFloat/fromFloat and min/max for future inlining > - Make toFloat and fromFloat constexpr > > v5: > - Make UT, Bits and Bitmask private (and remove doxygen) > - Remove constexpr from fromFloat which uses std::round (only constexpr > in C++23) > - static_assert that min<max when converted > - Provide new Q and UQ automatic width types (Thanks Barnabás) > - Convert types to shortened Q/UQ automatic widths > - Use automatic width Q/UQ for 12,4 > - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask > - Remove typedefs for Q1_7 etc > > v6: > - Use 'quantized' over 'quantised' > - Document sign is based on T and number of bits includes sign bit > > - Document that fromFloat also clamps between [min, max] > > - Remove 64 bit support. We have 32 bit assumptions on fromFloat > > - Restrict to 24 bits, to stay compatible with float types > > v7: > - Use unsigned storage and ensure we don't have sign extension issues. > --- > src/ipa/libipa/fixedpoint.cpp | 98 +++++++++++++++++++++++++++++++++++++++++++ > src/ipa/libipa/fixedpoint.h | 74 ++++++++++++++++++++++++++++++++ > 2 files changed, 172 insertions(+) > > diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp > index 6b698fc5d680..caa9ce0fc1ec 100644 > --- a/src/ipa/libipa/fixedpoint.cpp > +++ b/src/ipa/libipa/fixedpoint.cpp > @@ -37,6 +37,104 @@ namespace ipa { > * \return The converted value > */ > > +/** > + * \struct libcamera::ipa::FixedPointQTraits > + * \brief Traits type implementing fixed-point quantisation conversions > + * > + * The FixedPointQTraits structure defines a policy for mapping floating-point > + * values to and from fixed-point integer representations. It is parameterised > + * by the number of integer bits \a I, fractional bits \a F, and the integral > + * storage type \a T. The traits are used with Quantized<Traits> to create a > + * quantized type that stores both the fixed-point representation and the > + * corresponding floating-point value. > + * > + * The signedness of the type is determined by the signedness of \a T. For > + * signed types, the number of integer bits in \a I includes the sign bit. > + * > + * Storage is determined by the total number of bits \a (I + F) and is > + * automatically selected, but the internal storage type is always an unsigned > + * integer to guarantee against sign extension when storing quantized values > + * in registers. > + * > + * The trait exposes compile-time constants describing the bit layout, limits, > + * and scaling factors used in the fixed-point representation. > + * > + * \tparam I Number of integer bits > + * \tparam F Number of fractional bits > + * \tparam T Integral type used to store the quantized value > + */ > + > +/** > + * \typedef FixedPointQTraits::QuantizedType > + * \brief The integral storage type used for the fixed-point representation > + */ > + > +/** > + * \var FixedPointQTraits::qMin > + * \brief Minimum representable quantized integer value > + * > + * This corresponds to the most negative value for signed formats or zero for > + * unsigned formats. > + */ > + > +/** > + * \var FixedPointQTraits::qMax > + * \brief Maximum representable quantized integer value > + */ > + > +/** > + * \var FixedPointQTraits::min > + * \brief Minimum representable floating-point value corresponding to qMin > + */ > + > +/** > + * \var FixedPointQTraits::max > + * \brief Maximum representable floating-point value corresponding to qMax > + */ > + > +/** > + * \fn FixedPointQTraits::fromFloat(float v) > + * \brief Convert a floating-point value to a fixed-point integer > + * \param[in] v The floating-point value to be converted > + * \return The quantized fixed-point integer representation > + * > + * The conversion first clamps the floating-point input \a v to the range [min, > + * max] and then rounds it to the nearest integer according to the scaling > + * factor defined by the number of fractional bits F. > + */ > + > +/** > + * \fn FixedPointQTraits::toFloat(QuantizedType q) > + * \brief Convert a fixed-point integer to a floating-point value > + * \param[in] q The fixed-point integer value to be converted > + * \return The corresponding floating-point value > + * > + * The conversion sign-extends the integer value if required and divides by the > + * scaling factor defined by the number of fractional bits F. > + */ > + > +/** > + * \typedef Q > + * \brief Define a signed fixed-point quantized type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines a signed fixed-point quantized type using the > + * \ref FixedPointQTraits trait and a suitable signed integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > +/** > + * \typedef UQ > + * \brief Define an unsigned fixed-point quantized type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines an unsigned fixed-point quantized type using the > + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > } /* namespace ipa */ > > } /* namespace libcamera */ > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > index aeb9bce3269b..b6b611df7fc3 100644 > --- a/src/ipa/libipa/fixedpoint.h > +++ b/src/ipa/libipa/fixedpoint.h > @@ -10,6 +10,8 @@ > #include <cmath> > #include <type_traits> > > +#include "quantized.h" > + > namespace libcamera { > > namespace ipa { > @@ -63,6 +65,78 @@ constexpr R fixedToFloatingPoint(T number) > return static_cast<R>(t) / static_cast<R>(1 << F); > } > > +template<unsigned int I, unsigned int F, typename T> > +struct FixedPointQTraits { > +private: > + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); > + using UT = std::make_unsigned_t<T>; > + > + static constexpr unsigned int bits = I + F; > + static_assert(bits <= sizeof(UT) * 8, "FixedPointQTraits: too many bits for type UT"); > + > + /* > + * If fixed point storage is required with more than 24 bits, consider > + * updating this implementation to use double-precision floating point. > + */ > + static_assert(bits <= 24, "Floating point precision may be insufficient for more than 24 bits"); > + > + static constexpr UT bitMask = bits < sizeof(UT) * 8 > + ? (UT{ 1 } << bits) - 1 > + : ~UT{ 0 }; > + > +public: > + using QuantizedType = UT; > + > + static constexpr UT qMin = std::is_signed_v<T> > + ? -(UT{ 1 } << (bits - 1)) > + : 0; > + > + static constexpr UT qMax = std::is_signed_v<T> > + ? (UT{ 1 } << (bits - 1)) - 1 > + : bitMask; > + > + static constexpr float toFloat(QuantizedType q) > + { > + return fixedToFloatingPoint<I, F, float, T>(q); > + } > + > + static constexpr float min = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMin)); > + static constexpr float max = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMax)); > + > + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); > + > + /* Conversion functions required by Quantized<Traits> */ > + static QuantizedType fromFloat(float v) > + { > + v = std::clamp(v, min, max); > + return floatingToFixedPoint<I, F, T, float>(v); > + } > +}; > + > +namespace details { > + > +template<unsigned int Bits> > +constexpr auto qtype() > +{ > + static_assert(Bits <= 32, > + "Unsupported number of bits for quantized type"); > + > + if constexpr (Bits <= 8) > + return int8_t(); > + else if constexpr (Bits <= 16) > + return int16_t(); > + else if constexpr (Bits <= 32) > + return int32_t(); > +} > + > +} /* namespace details */ > + > +template<unsigned int I, unsigned int F> > +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; > + > +template<unsigned int I, unsigned int F> > +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; > + > } /* namespace ipa */ > > } /* namespace libcamera */ > > -- > 2.52.0 >
On Fri, Feb 13, 2026 at 04:57:43PM +0000, Kieran Bingham wrote: > Extend the new Quantized type infrastructure by providing a > FixedPointQTraits template. > > This allows construction of fixed point types with a Quantized storage > that allows easy reading of both the underlying quantized type value and > a floating point representation of that same value. > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > > --- > v4: > - Assert that the given type has enough bits for the usage > - Use unsigned types for calculating qmin/qmax > - Reorder toFloat/fromFloat and min/max for future inlining > - Make toFloat and fromFloat constexpr > > v5: > - Make UT, Bits and Bitmask private (and remove doxygen) > - Remove constexpr from fromFloat which uses std::round (only constexpr > in C++23) > - static_assert that min<max when converted > - Provide new Q and UQ automatic width types (Thanks Barnabás) > - Convert types to shortened Q/UQ automatic widths > - Use automatic width Q/UQ for 12,4 > - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask > - Remove typedefs for Q1_7 etc > > v6: > - Use 'quantized' over 'quantised' > - Document sign is based on T and number of bits includes sign bit > > - Document that fromFloat also clamps between [min, max] > > - Remove 64 bit support. We have 32 bit assumptions on fromFloat > > - Restrict to 24 bits, to stay compatible with float types > > v7: > - Use unsigned storage and ensure we don't have sign extension issues. > --- > src/ipa/libipa/fixedpoint.cpp | 98 +++++++++++++++++++++++++++++++++++++++++++ > src/ipa/libipa/fixedpoint.h | 74 ++++++++++++++++++++++++++++++++ > 2 files changed, 172 insertions(+) > > diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp > index 6b698fc5d680..caa9ce0fc1ec 100644 > --- a/src/ipa/libipa/fixedpoint.cpp > +++ b/src/ipa/libipa/fixedpoint.cpp > @@ -37,6 +37,104 @@ namespace ipa { > * \return The converted value > */ > > +/** > + * \struct libcamera::ipa::FixedPointQTraits > + * \brief Traits type implementing fixed-point quantisation conversions s/quantisation/quantization/ (for consistency, not because it's the right flavour of English :-)) > + * > + * The FixedPointQTraits structure defines a policy for mapping floating-point > + * values to and from fixed-point integer representations. It is parameterised s/fixed-point/fixed-point 2's complement/ (just noticed we never mention this anywhere) > + * by the number of integer bits \a I, fractional bits \a F, and the integral > + * storage type \a T. The traits are used with Quantized<Traits> to create a > + * quantized type that stores both the fixed-point representation and the > + * corresponding floating-point value. > + * > + * The signedness of the type is determined by the signedness of \a T. For > + * signed types, the number of integer bits in \a I includes the sign bit. > + * > + * Storage is determined by the total number of bits \a (I + F) and is > + * automatically selected, but the internal storage type is always an unsigned > + * integer to guarantee against sign extension when storing quantized values > + * in registers. > + * > + * The trait exposes compile-time constants describing the bit layout, limits, > + * and scaling factors used in the fixed-point representation. > + * > + * \tparam I Number of integer bits > + * \tparam F Number of fractional bits > + * \tparam T Integral type used to store the quantized value > + */ > + > +/** > + * \typedef FixedPointQTraits::QuantizedType > + * \brief The integral storage type used for the fixed-point representation > + */ > + > +/** > + * \var FixedPointQTraits::qMin > + * \brief Minimum representable quantized integer value > + * > + * This corresponds to the most negative value for signed formats or zero for > + * unsigned formats. > + */ > + > +/** > + * \var FixedPointQTraits::qMax > + * \brief Maximum representable quantized integer value > + */ > + > +/** > + * \var FixedPointQTraits::min > + * \brief Minimum representable floating-point value corresponding to qMin > + */ > + > +/** > + * \var FixedPointQTraits::max > + * \brief Maximum representable floating-point value corresponding to qMax > + */ > + > +/** > + * \fn FixedPointQTraits::fromFloat(float v) > + * \brief Convert a floating-point value to a fixed-point integer > + * \param[in] v The floating-point value to be converted > + * \return The quantized fixed-point integer representation > + * > + * The conversion first clamps the floating-point input \a v to the range [min, > + * max] and then rounds it to the nearest integer according to the scaling s/integer/fixed-point value/ maybe ? Rounding to the nearest integer sounds likes it rounds to an integer value. > + * factor defined by the number of fractional bits F. > + */ > + > +/** > + * \fn FixedPointQTraits::toFloat(QuantizedType q) > + * \brief Convert a fixed-point integer to a floating-point value > + * \param[in] q The fixed-point integer value to be converted > + * \return The corresponding floating-point value > + * > + * The conversion sign-extends the integer value if required and divides by the > + * scaling factor defined by the number of fractional bits F. > + */ > + > +/** > + * \typedef Q > + * \brief Define a signed fixed-point quantized type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines a signed fixed-point quantized type using the > + * \ref FixedPointQTraits trait and a suitable signed integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > +/** > + * \typedef UQ > + * \brief Define an unsigned fixed-point quantized type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines an unsigned fixed-point quantized type using the > + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > } /* namespace ipa */ > > } /* namespace libcamera */ > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > index aeb9bce3269b..b6b611df7fc3 100644 > --- a/src/ipa/libipa/fixedpoint.h > +++ b/src/ipa/libipa/fixedpoint.h > @@ -10,6 +10,8 @@ > #include <cmath> > #include <type_traits> > > +#include "quantized.h" > + > namespace libcamera { > > namespace ipa { > @@ -63,6 +65,78 @@ constexpr R fixedToFloatingPoint(T number) > return static_cast<R>(t) / static_cast<R>(1 << F); > } > > +template<unsigned int I, unsigned int F, typename T> > +struct FixedPointQTraits { > +private: > + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); > + using UT = std::make_unsigned_t<T>; > + > + static constexpr unsigned int bits = I + F; > + static_assert(bits <= sizeof(UT) * 8, "FixedPointQTraits: too many bits for type UT"); > + > + /* > + * If fixed point storage is required with more than 24 bits, consider > + * updating this implementation to use double-precision floating point. > + */ > + static_assert(bits <= 24, "Floating point precision may be insufficient for more than 24 bits"); > + > + static constexpr UT bitMask = bits < sizeof(UT) * 8 > + ? (UT{ 1 } << bits) - 1 > + : ~UT{ 0 }; Weird indentation. static constexpr UT bitMask = bits < sizeof(UT) * 8 ? (UT{ 1 } << bits) - 1 : ~UT{ 0 }; Same below. > + > +public: > + using QuantizedType = UT; > + > + static constexpr UT qMin = std::is_signed_v<T> > + ? -(UT{ 1 } << (bits - 1)) > + : 0; > + > + static constexpr UT qMax = std::is_signed_v<T> > + ? (UT{ 1 } << (bits - 1)) - 1 > + : bitMask; > + > + static constexpr float toFloat(QuantizedType q) > + { > + return fixedToFloatingPoint<I, F, float, T>(q); > + } > + > + static constexpr float min = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMin)); > + static constexpr float max = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMax)); > + > + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); > + > + /* Conversion functions required by Quantized<Traits> */ > + static QuantizedType fromFloat(float v) You're dropped the constexpr here, was that intentional ? Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> > + { > + v = std::clamp(v, min, max); > + return floatingToFixedPoint<I, F, T, float>(v); > + } > +}; > + > +namespace details { > + > +template<unsigned int Bits> > +constexpr auto qtype() > +{ > + static_assert(Bits <= 32, > + "Unsupported number of bits for quantized type"); > + > + if constexpr (Bits <= 8) > + return int8_t(); > + else if constexpr (Bits <= 16) > + return int16_t(); > + else if constexpr (Bits <= 32) > + return int32_t(); > +} > + > +} /* namespace details */ > + > +template<unsigned int I, unsigned int F> > +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; > + > +template<unsigned int I, unsigned int F> > +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; > + > } /* namespace ipa */ > > } /* namespace libcamera */
Quoting Laurent Pinchart (2026-02-19 12:47:58) > On Fri, Feb 13, 2026 at 04:57:43PM +0000, Kieran Bingham wrote: > > Extend the new Quantized type infrastructure by providing a > > FixedPointQTraits template. > > > > This allows construction of fixed point types with a Quantized storage > > that allows easy reading of both the underlying quantized type value and > > a floating point representation of that same value. > > > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > > > > --- > > v4: > > - Assert that the given type has enough bits for the usage > > - Use unsigned types for calculating qmin/qmax > > - Reorder toFloat/fromFloat and min/max for future inlining > > - Make toFloat and fromFloat constexpr > > > > v5: > > - Make UT, Bits and Bitmask private (and remove doxygen) > > - Remove constexpr from fromFloat which uses std::round (only constexpr > > in C++23) > > - static_assert that min<max when converted > > - Provide new Q and UQ automatic width types (Thanks Barnabás) > > - Convert types to shortened Q/UQ automatic widths > > - Use automatic width Q/UQ for 12,4 > > - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask > > - Remove typedefs for Q1_7 etc > > > > v6: > > - Use 'quantized' over 'quantised' > > - Document sign is based on T and number of bits includes sign bit > > > > - Document that fromFloat also clamps between [min, max] > > > > - Remove 64 bit support. We have 32 bit assumptions on fromFloat > > > > - Restrict to 24 bits, to stay compatible with float types > > > > v7: > > - Use unsigned storage and ensure we don't have sign extension issues. > > --- > > src/ipa/libipa/fixedpoint.cpp | 98 +++++++++++++++++++++++++++++++++++++++++++ > > src/ipa/libipa/fixedpoint.h | 74 ++++++++++++++++++++++++++++++++ > > 2 files changed, 172 insertions(+) > > > > diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp > > index 6b698fc5d680..caa9ce0fc1ec 100644 > > --- a/src/ipa/libipa/fixedpoint.cpp > > +++ b/src/ipa/libipa/fixedpoint.cpp > > @@ -37,6 +37,104 @@ namespace ipa { > > * \return The converted value > > */ > > > > +/** > > + * \struct libcamera::ipa::FixedPointQTraits > > + * \brief Traits type implementing fixed-point quantisation conversions > > s/quantisation/quantization/ (for consistency, not because it's the > right flavour of English :-)) I know - I hate it ... my fingers type english ... but somehow I ended up mostly using that other thing... I'm not changing everything now ;-) > > > + * > > + * The FixedPointQTraits structure defines a policy for mapping floating-point > > + * values to and from fixed-point integer representations. It is parameterised > > s/fixed-point/fixed-point 2's complement/ > > (just noticed we never mention this anywhere) Updated, > > > + * by the number of integer bits \a I, fractional bits \a F, and the integral > > + * storage type \a T. The traits are used with Quantized<Traits> to create a > > + * quantized type that stores both the fixed-point representation and the > > + * corresponding floating-point value. > > + * > > + * The signedness of the type is determined by the signedness of \a T. For > > + * signed types, the number of integer bits in \a I includes the sign bit. > > + * > > + * Storage is determined by the total number of bits \a (I + F) and is > > + * automatically selected, but the internal storage type is always an unsigned > > + * integer to guarantee against sign extension when storing quantized values > > + * in registers. > > + * > > + * The trait exposes compile-time constants describing the bit layout, limits, > > + * and scaling factors used in the fixed-point representation. > > + * > > + * \tparam I Number of integer bits > > + * \tparam F Number of fractional bits > > + * \tparam T Integral type used to store the quantized value > > + */ > > + > > +/** > > + * \typedef FixedPointQTraits::QuantizedType > > + * \brief The integral storage type used for the fixed-point representation > > + */ > > + > > +/** > > + * \var FixedPointQTraits::qMin > > + * \brief Minimum representable quantized integer value > > + * > > + * This corresponds to the most negative value for signed formats or zero for > > + * unsigned formats. > > + */ > > + > > +/** > > + * \var FixedPointQTraits::qMax > > + * \brief Maximum representable quantized integer value > > + */ > > + > > +/** > > + * \var FixedPointQTraits::min > > + * \brief Minimum representable floating-point value corresponding to qMin > > + */ > > + > > +/** > > + * \var FixedPointQTraits::max > > + * \brief Maximum representable floating-point value corresponding to qMax > > + */ > > + > > +/** > > + * \fn FixedPointQTraits::fromFloat(float v) > > + * \brief Convert a floating-point value to a fixed-point integer > > + * \param[in] v The floating-point value to be converted > > + * \return The quantized fixed-point integer representation > > + * > > + * The conversion first clamps the floating-point input \a v to the range [min, > > + * max] and then rounds it to the nearest integer according to the scaling > > s/integer/fixed-point value/ maybe ? Rounding to the nearest integer > sounds likes it rounds to an integer value. Ack, > > > + * factor defined by the number of fractional bits F. > > + */ > > + > > +/** > > + * \fn FixedPointQTraits::toFloat(QuantizedType q) > > + * \brief Convert a fixed-point integer to a floating-point value > > + * \param[in] q The fixed-point integer value to be converted > > + * \return The corresponding floating-point value > > + * > > + * The conversion sign-extends the integer value if required and divides by the > > + * scaling factor defined by the number of fractional bits F. > > + */ > > + > > +/** > > + * \typedef Q > > + * \brief Define a signed fixed-point quantized type with automatic storage width > > + * \tparam I The number of integer bits > > + * \tparam F The number of fractional bits > > + * > > + * This alias defines a signed fixed-point quantized type using the > > + * \ref FixedPointQTraits trait and a suitable signed integer storage type > > + * automatically selected based on the total number of bits \a (I + F). > > + */ > > + > > +/** > > + * \typedef UQ > > + * \brief Define an unsigned fixed-point quantized type with automatic storage width > > + * \tparam I The number of integer bits > > + * \tparam F The number of fractional bits > > + * > > + * This alias defines an unsigned fixed-point quantized type using the > > + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type > > + * automatically selected based on the total number of bits \a (I + F). > > + */ > > + > > } /* namespace ipa */ > > > > } /* namespace libcamera */ > > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > > index aeb9bce3269b..b6b611df7fc3 100644 > > --- a/src/ipa/libipa/fixedpoint.h > > +++ b/src/ipa/libipa/fixedpoint.h > > @@ -10,6 +10,8 @@ > > #include <cmath> > > #include <type_traits> > > > > +#include "quantized.h" > > + > > namespace libcamera { > > > > namespace ipa { > > @@ -63,6 +65,78 @@ constexpr R fixedToFloatingPoint(T number) > > return static_cast<R>(t) / static_cast<R>(1 << F); > > } > > > > +template<unsigned int I, unsigned int F, typename T> > > +struct FixedPointQTraits { > > +private: > > + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); > > + using UT = std::make_unsigned_t<T>; > > + > > + static constexpr unsigned int bits = I + F; > > + static_assert(bits <= sizeof(UT) * 8, "FixedPointQTraits: too many bits for type UT"); > > + > > + /* > > + * If fixed point storage is required with more than 24 bits, consider > > + * updating this implementation to use double-precision floating point. > > + */ > > + static_assert(bits <= 24, "Floating point precision may be insufficient for more than 24 bits"); > > + > > + static constexpr UT bitMask = bits < sizeof(UT) * 8 > > + ? (UT{ 1 } << bits) - 1 > > + : ~UT{ 0 }; > > Weird indentation. > > static constexpr UT bitMask = bits < sizeof(UT) * 8 > ? (UT{ 1 } << bits) - 1 > : ~UT{ 0 }; > > Same below. I caved and just followed the formatter style. I wanted checkpatch clean. Changed back ... we need to fund a 3 year research project on how to make clang-format know how we like to format things.... > > > + > > +public: > > + using QuantizedType = UT; > > + > > + static constexpr UT qMin = std::is_signed_v<T> > > + ? -(UT{ 1 } << (bits - 1)) > > + : 0; > > + > > + static constexpr UT qMax = std::is_signed_v<T> > > + ? (UT{ 1 } << (bits - 1)) - 1 > > + : bitMask; > > + > > + static constexpr float toFloat(QuantizedType q) > > + { > > + return fixedToFloatingPoint<I, F, float, T>(q); > > + } > > + > > + static constexpr float min = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMin)); > > + static constexpr float max = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMax)); > > + > > + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); > > + > > + /* Conversion functions required by Quantized<Traits> */ > > + static QuantizedType fromFloat(float v) > > You're dropped the constexpr here, was that intentional ? I recall that being from a review from Barnabas ? digging. V4: - Make toFloat and fromFloat constexpr v5: - Remove constexpr from fromFloat which uses std::round (only constexpr in C++23) So - yes, intentional. > > Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> > > > + { > > + v = std::clamp(v, min, max); > > + return floatingToFixedPoint<I, F, T, float>(v); > > + } > > +}; > > + > > +namespace details { > > + > > +template<unsigned int Bits> > > +constexpr auto qtype() > > +{ > > + static_assert(Bits <= 32, > > + "Unsupported number of bits for quantized type"); > > + > > + if constexpr (Bits <= 8) > > + return int8_t(); > > + else if constexpr (Bits <= 16) > > + return int16_t(); > > + else if constexpr (Bits <= 32) > > + return int32_t(); > > +} > > + > > +} /* namespace details */ > > + > > +template<unsigned int I, unsigned int F> > > +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; > > + > > +template<unsigned int I, unsigned int F> > > +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; > > + > > } /* namespace ipa */ > > > > } /* namespace libcamera */ > > -- > Regards, > > Laurent Pinchart
diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp index 6b698fc5d680..caa9ce0fc1ec 100644 --- a/src/ipa/libipa/fixedpoint.cpp +++ b/src/ipa/libipa/fixedpoint.cpp @@ -37,6 +37,104 @@ namespace ipa { * \return The converted value */ +/** + * \struct libcamera::ipa::FixedPointQTraits + * \brief Traits type implementing fixed-point quantisation conversions + * + * The FixedPointQTraits structure defines a policy for mapping floating-point + * values to and from fixed-point integer representations. It is parameterised + * by the number of integer bits \a I, fractional bits \a F, and the integral + * storage type \a T. The traits are used with Quantized<Traits> to create a + * quantized type that stores both the fixed-point representation and the + * corresponding floating-point value. + * + * The signedness of the type is determined by the signedness of \a T. For + * signed types, the number of integer bits in \a I includes the sign bit. + * + * Storage is determined by the total number of bits \a (I + F) and is + * automatically selected, but the internal storage type is always an unsigned + * integer to guarantee against sign extension when storing quantized values + * in registers. + * + * The trait exposes compile-time constants describing the bit layout, limits, + * and scaling factors used in the fixed-point representation. + * + * \tparam I Number of integer bits + * \tparam F Number of fractional bits + * \tparam T Integral type used to store the quantized value + */ + +/** + * \typedef FixedPointQTraits::QuantizedType + * \brief The integral storage type used for the fixed-point representation + */ + +/** + * \var FixedPointQTraits::qMin + * \brief Minimum representable quantized integer value + * + * This corresponds to the most negative value for signed formats or zero for + * unsigned formats. + */ + +/** + * \var FixedPointQTraits::qMax + * \brief Maximum representable quantized integer value + */ + +/** + * \var FixedPointQTraits::min + * \brief Minimum representable floating-point value corresponding to qMin + */ + +/** + * \var FixedPointQTraits::max + * \brief Maximum representable floating-point value corresponding to qMax + */ + +/** + * \fn FixedPointQTraits::fromFloat(float v) + * \brief Convert a floating-point value to a fixed-point integer + * \param[in] v The floating-point value to be converted + * \return The quantized fixed-point integer representation + * + * The conversion first clamps the floating-point input \a v to the range [min, + * max] and then rounds it to the nearest integer according to the scaling + * factor defined by the number of fractional bits F. + */ + +/** + * \fn FixedPointQTraits::toFloat(QuantizedType q) + * \brief Convert a fixed-point integer to a floating-point value + * \param[in] q The fixed-point integer value to be converted + * \return The corresponding floating-point value + * + * The conversion sign-extends the integer value if required and divides by the + * scaling factor defined by the number of fractional bits F. + */ + +/** + * \typedef Q + * \brief Define a signed fixed-point quantized type with automatic storage width + * \tparam I The number of integer bits + * \tparam F The number of fractional bits + * + * This alias defines a signed fixed-point quantized type using the + * \ref FixedPointQTraits trait and a suitable signed integer storage type + * automatically selected based on the total number of bits \a (I + F). + */ + +/** + * \typedef UQ + * \brief Define an unsigned fixed-point quantized type with automatic storage width + * \tparam I The number of integer bits + * \tparam F The number of fractional bits + * + * This alias defines an unsigned fixed-point quantized type using the + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type + * automatically selected based on the total number of bits \a (I + F). + */ + } /* namespace ipa */ } /* namespace libcamera */ diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h index aeb9bce3269b..b6b611df7fc3 100644 --- a/src/ipa/libipa/fixedpoint.h +++ b/src/ipa/libipa/fixedpoint.h @@ -10,6 +10,8 @@ #include <cmath> #include <type_traits> +#include "quantized.h" + namespace libcamera { namespace ipa { @@ -63,6 +65,78 @@ constexpr R fixedToFloatingPoint(T number) return static_cast<R>(t) / static_cast<R>(1 << F); } +template<unsigned int I, unsigned int F, typename T> +struct FixedPointQTraits { +private: + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); + using UT = std::make_unsigned_t<T>; + + static constexpr unsigned int bits = I + F; + static_assert(bits <= sizeof(UT) * 8, "FixedPointQTraits: too many bits for type UT"); + + /* + * If fixed point storage is required with more than 24 bits, consider + * updating this implementation to use double-precision floating point. + */ + static_assert(bits <= 24, "Floating point precision may be insufficient for more than 24 bits"); + + static constexpr UT bitMask = bits < sizeof(UT) * 8 + ? (UT{ 1 } << bits) - 1 + : ~UT{ 0 }; + +public: + using QuantizedType = UT; + + static constexpr UT qMin = std::is_signed_v<T> + ? -(UT{ 1 } << (bits - 1)) + : 0; + + static constexpr UT qMax = std::is_signed_v<T> + ? (UT{ 1 } << (bits - 1)) - 1 + : bitMask; + + static constexpr float toFloat(QuantizedType q) + { + return fixedToFloatingPoint<I, F, float, T>(q); + } + + static constexpr float min = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMin)); + static constexpr float max = fixedToFloatingPoint<I, F, float>(static_cast<T>(qMax)); + + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); + + /* Conversion functions required by Quantized<Traits> */ + static QuantizedType fromFloat(float v) + { + v = std::clamp(v, min, max); + return floatingToFixedPoint<I, F, T, float>(v); + } +}; + +namespace details { + +template<unsigned int Bits> +constexpr auto qtype() +{ + static_assert(Bits <= 32, + "Unsupported number of bits for quantized type"); + + if constexpr (Bits <= 8) + return int8_t(); + else if constexpr (Bits <= 16) + return int16_t(); + else if constexpr (Bits <= 32) + return int32_t(); +} + +} /* namespace details */ + +template<unsigned int I, unsigned int F> +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; + +template<unsigned int I, unsigned int F> +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; + } /* namespace ipa */ } /* namespace libcamera */