| Message ID | 20260114173918.1744023-5-kieran.bingham@ideasonboard.com |
|---|---|
| State | Superseded |
| Headers | show |
| Series |
|
| Related | show |
2026. 01. 14. 18:39 keltezéssel, Kieran Bingham írta: > Extend the new Quantized type infrastructure by providing a > FixedPointQTraits template. > > This allows construction of fixed point types with a Quantized storage > that allows easy reading of both the underlying quantized type value and > a floating point representation of that same value. > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > > --- > v4: > - Assert that the given type has enough bits for the usage > - Use unsigned types for calculating qmin/qmax > - Reorder toFloat/fromFloat and min/max for future inlining > - Make toFloat and fromFloat constexpr > > v5: > - Make UT, Bits and Bitmask private (and remove doxygen) > - Remove constexpr from fromFloat which uses std::round (only constexpr > in C++23) > - static_assert that min<max when converted > - Provide new Q and UQ automatic width types (Thanks Barnabás) > - Convert types to shortened Q/UQ automatic widths > - Use automatic width Q/UQ for 12,4 > - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask > - Remove typedefs for Q1_7 etc > > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > --- > src/ipa/libipa/fixedpoint.cpp | 89 +++++++++++++++++++++++++++++++++++ > src/ipa/libipa/fixedpoint.h | 69 +++++++++++++++++++++++++++ > 2 files changed, 158 insertions(+) > > diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp > index 6b698fc5d680..43d76f745d8a 100644 > --- a/src/ipa/libipa/fixedpoint.cpp > +++ b/src/ipa/libipa/fixedpoint.cpp > @@ -37,6 +37,95 @@ namespace ipa { > * \return The converted value > */ > > +/** > + * \struct libcamera::ipa::FixedPointQTraits > + * \brief Traits type implementing fixed-point quantisation conversions In a different commit you used "quanti[z]...", I think consistency would be better. > + * > + * The FixedPointQTraits structure defines a policy for mapping floating-point > + * values to and from fixed-point integer representations. It is parameterised > + * by the number of integer bits \a I, fractional bits \a F, and the integral > + * storage type \a T. The traits are used with Quantized<Traits> to create a > + * quantised type that stores both the fixed-point representation and the > + * corresponding floating-point value. > + * > + * The trait exposes compile-time constants describing the bit layout, limits, > + * and scaling factors used in the fixed-point representation. "The sign of the value is determined by the sign of \a T." I would add something like this. > + * > + * \tparam I Number of integer bits > + * \tparam F Number of fractional bits > + * \tparam T Integral type used to store the quantised value > + */ > + > +/** > + * \typedef FixedPointQTraits::QuantizedType > + * \brief The integral storage type used for the fixed-point representation > + */ > + > +/** > + * \var FixedPointQTraits::qMin > + * \brief Minimum representable quantised integer value > + * > + * This corresponds to the most negative value for signed formats or zero for > + * unsigned formats. > + */ > + > +/** > + * \var FixedPointQTraits::qMax > + * \brief Maximum representable quantised integer value > + */ > + > +/** > + * \var FixedPointQTraits::min > + * \brief Minimum representable floating-point value corresponding to qMin > + */ > + > +/** > + * \var FixedPointQTraits::max > + * \brief Maximum representable floating-point value corresponding to qMax > + */ > + > +/** > + * \fn FixedPointQTraits::fromFloat(float v) > + * \brief Convert a floating-point value to a fixed-point integer > + * \param[in] v The floating-point value to be converted > + * \return The quantised fixed-point integer representation > + * > + * The conversion rounds the floating-point input \a v to the nearest integer > + * according to the scaling factor defined by the number of fractional bits F. I think it's worth mentioning that `v` is clamped first. > + */ > + > +/** > + * \fn FixedPointQTraits::toFloat(QuantizedType q) > + * \brief Convert a fixed-point integer to a floating-point value > + * \param[in] q The fixed-point integer value to be converted > + * \return The corresponding floating-point value > + * > + * The conversion sign-extends the integer value if required and divides by the > + * scaling factor defined by the number of fractional bits F. > + */ > + > +/** > + * \typedef Q > + * \brief Define a signed fixed-point quantised type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines a signed fixed-point quantised type using the > + * \ref FixedPointQTraits trait and a suitable signed integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > +/** > + * \typedef UQ > + * \brief Define an unsigned fixed-point quantised type with automatic storage width > + * \tparam I The number of integer bits > + * \tparam F The number of fractional bits > + * > + * This alias defines an unsigned fixed-point quantised type using the > + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type > + * automatically selected based on the total number of bits \a (I + F). > + */ > + > } /* namespace ipa */ > > } /* namespace libcamera */ > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > index b4a7fa5e0ecd..4f6ee081604b 100644 > --- a/src/ipa/libipa/fixedpoint.h > +++ b/src/ipa/libipa/fixedpoint.h > @@ -10,6 +10,8 @@ > #include <cmath> > #include <type_traits> > > +#include "quantized.h" > + > namespace libcamera { > > namespace ipa { > @@ -63,6 +65,73 @@ constexpr R fixedToFloatingPoint(T number) > return static_cast<R>(t) / static_cast<R>(1 << F); > } > > +template<unsigned int I, unsigned int F, typename T> > +struct FixedPointQTraits { > +private: > + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); > + using UT = std::make_unsigned_t<T>; > + > + static constexpr unsigned int bits = I + F; > + static_assert(bits <= sizeof(T) * 8, "FixedPointQTraits: too many bits for type T"); > + > + static constexpr T bitMask = (bits < sizeof(T) * 8) > + ? static_cast<T>((UT{1} << bits) - 1) > + : static_cast<T>(~UT{0}); I think `static_cast<T>((UT{1} << bits) - 1)` should work in every case. If `I+F` is the full width, then `(UT{1} << bits) == 0`, subtracting one yields `~UT{0}`. (Unless the usual integer promotions apply, but then `bits` is less than the width of promoted-to type, so no overflow/wraparound is possible as far as I can tell.) > + > +public: > + using QuantizedType = T; > + > + static constexpr T qMin = std::is_signed_v<T> > + ? static_cast<T>(-(UT{1} << (bits - 1))) > + : static_cast<T>(0); > + > + static constexpr T qMax = std::is_signed_v<T> > + ? static_cast<T>((UT{1} << (bits - 1)) - 1) > + : static_cast<T>((UT{1} << bits) - 1); > + > + static constexpr float toFloat(QuantizedType q) > + { > + return fixedToFloatingPoint<I, F, float, QuantizedType>(q); > + } > + > + static constexpr float min = fixedToFloatingPoint<I, F, float>(qMin); > + static constexpr float max = fixedToFloatingPoint<I, F, float>(qMax); > + > + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); > + > + /* Conversion functions required by Quantized<Traits> */ > + static QuantizedType fromFloat(float v) > + { > + v = std::clamp(v, min, max); > + return floatingToFixedPoint<I, F, QuantizedType, float>(v); > + } > +}; > + > +namespace details { > + > +template<unsigned int Bits> > +constexpr auto qtype() > +{ > + static_assert(Bits <= 64); > + > + if constexpr (Bits <= 8) > + return int8_t(); > + else if constexpr (Bits <= 16) > + return int16_t(); > + else if constexpr (Bits <= 32) > + return int32_t(); > + else if constexpr (Bits <= 64) > + return int64_t(); > +} > + > +} /* namespace details */ > + > +template<unsigned int I, unsigned int F> > +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; > + > +template<unsigned int I, unsigned int F> > +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; > + > } /* namespace ipa */ > > } /* namespace libcamera */
Quoting Barnabás Pőcze (2026-01-15 16:28:45) > 2026. 01. 14. 18:39 keltezéssel, Kieran Bingham írta: > > Extend the new Quantized type infrastructure by providing a > > FixedPointQTraits template. > > > > This allows construction of fixed point types with a Quantized storage > > that allows easy reading of both the underlying quantized type value and > > a floating point representation of that same value. > > > > Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > > > > --- > > v4: > > - Assert that the given type has enough bits for the usage > > - Use unsigned types for calculating qmin/qmax > > - Reorder toFloat/fromFloat and min/max for future inlining > > - Make toFloat and fromFloat constexpr > > > > v5: > > - Make UT, Bits and Bitmask private (and remove doxygen) > > - Remove constexpr from fromFloat which uses std::round (only constexpr > > in C++23) > > - static_assert that min<max when converted > > - Provide new Q and UQ automatic width types (Thanks Barnabás) > > - Convert types to shortened Q/UQ automatic widths > > - Use automatic width Q/UQ for 12,4 > > - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask > > - Remove typedefs for Q1_7 etc > > > > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > > --- > > src/ipa/libipa/fixedpoint.cpp | 89 +++++++++++++++++++++++++++++++++++ > > src/ipa/libipa/fixedpoint.h | 69 +++++++++++++++++++++++++++ > > 2 files changed, 158 insertions(+) > > > > diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp > > index 6b698fc5d680..43d76f745d8a 100644 > > --- a/src/ipa/libipa/fixedpoint.cpp > > +++ b/src/ipa/libipa/fixedpoint.cpp > > @@ -37,6 +37,95 @@ namespace ipa { > > * \return The converted value > > */ > > > > +/** > > + * \struct libcamera::ipa::FixedPointQTraits > > + * \brief Traits type implementing fixed-point quantisation conversions > > In a different commit you used "quanti[z]...", I think consistency would be better. > > > > + * > > + * The FixedPointQTraits structure defines a policy for mapping floating-point > > + * values to and from fixed-point integer representations. It is parameterised > > + * by the number of integer bits \a I, fractional bits \a F, and the integral > > + * storage type \a T. The traits are used with Quantized<Traits> to create a > > + * quantised type that stores both the fixed-point representation and the > > + * corresponding floating-point value. > > + * > > + * The trait exposes compile-time constants describing the bit layout, limits, > > + * and scaling factors used in the fixed-point representation. > > "The sign of the value is determined by the sign of \a T." > > I would add something like this. > Also confirmed that the I contains the sign bit for signed types: + * The sign of the value is determined by the sign of \a T. For signed types, + * the number of integer bits in \a I includes the sign bit. > > + * > > + * \tparam I Number of integer bits > > + * \tparam F Number of fractional bits > > + * \tparam T Integral type used to store the quantised value > > + */ > > + > > +/** > > + * \typedef FixedPointQTraits::QuantizedType > > + * \brief The integral storage type used for the fixed-point representation > > + */ > > + > > +/** > > + * \var FixedPointQTraits::qMin > > + * \brief Minimum representable quantised integer value > > + * > > + * This corresponds to the most negative value for signed formats or zero for > > + * unsigned formats. > > + */ > > + > > +/** > > + * \var FixedPointQTraits::qMax > > + * \brief Maximum representable quantised integer value > > + */ > > + > > +/** > > + * \var FixedPointQTraits::min > > + * \brief Minimum representable floating-point value corresponding to qMin > > + */ > > + > > +/** > > + * \var FixedPointQTraits::max > > + * \brief Maximum representable floating-point value corresponding to qMax > > + */ > > + > > +/** > > + * \fn FixedPointQTraits::fromFloat(float v) > > + * \brief Convert a floating-point value to a fixed-point integer > > + * \param[in] v The floating-point value to be converted > > + * \return The quantised fixed-point integer representation > > + * > > + * The conversion rounds the floating-point input \a v to the nearest integer > > + * according to the scaling factor defined by the number of fractional bits F. > > I think it's worth mentioning that `v` is clamped first. Added. - * The conversion rounds the floating-point input \a v to the nearest integer - * according to the scaling factor defined by the number of fractional bits F. + * The conversion first clamps the floating-point input \a v to the range [min, + * max] and then rounds it to the nearest integer according to the scaling + * factor defined by the number of fractional bits F. > > > > + */ > > + > > +/** > > + * \fn FixedPointQTraits::toFloat(QuantizedType q) > > + * \brief Convert a fixed-point integer to a floating-point value > > + * \param[in] q The fixed-point integer value to be converted > > + * \return The corresponding floating-point value > > + * > > + * The conversion sign-extends the integer value if required and divides by the > > + * scaling factor defined by the number of fractional bits F. > > + */ > > + > > +/** > > + * \typedef Q > > + * \brief Define a signed fixed-point quantised type with automatic storage width > > + * \tparam I The number of integer bits > > + * \tparam F The number of fractional bits > > + * > > + * This alias defines a signed fixed-point quantised type using the > > + * \ref FixedPointQTraits trait and a suitable signed integer storage type > > + * automatically selected based on the total number of bits \a (I + F). > > + */ > > + > > +/** > > + * \typedef UQ > > + * \brief Define an unsigned fixed-point quantised type with automatic storage width > > + * \tparam I The number of integer bits > > + * \tparam F The number of fractional bits > > + * > > + * This alias defines an unsigned fixed-point quantised type using the > > + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type > > + * automatically selected based on the total number of bits \a (I + F). > > + */ > > + > > } /* namespace ipa */ > > > > } /* namespace libcamera */ > > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > > index b4a7fa5e0ecd..4f6ee081604b 100644 > > --- a/src/ipa/libipa/fixedpoint.h > > +++ b/src/ipa/libipa/fixedpoint.h > > @@ -10,6 +10,8 @@ > > #include <cmath> > > #include <type_traits> > > > > +#include "quantized.h" > > + > > namespace libcamera { > > > > namespace ipa { > > @@ -63,6 +65,73 @@ constexpr R fixedToFloatingPoint(T number) > > return static_cast<R>(t) / static_cast<R>(1 << F); > > } > > > > +template<unsigned int I, unsigned int F, typename T> > > +struct FixedPointQTraits { > > +private: > > + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); > > + using UT = std::make_unsigned_t<T>; > > + > > + static constexpr unsigned int bits = I + F; > > + static_assert(bits <= sizeof(T) * 8, "FixedPointQTraits: too many bits for type T"); > > + > > + static constexpr T bitMask = (bits < sizeof(T) * 8) > > + ? static_cast<T>((UT{1} << bits) - 1) > > + : static_cast<T>(~UT{0}); > > I think `static_cast<T>((UT{1} << bits) - 1)` should work in every case. > If `I+F` is the full width, then `(UT{1} << bits) == 0`, subtracting one > yields `~UT{0}`. (Unless the usual integer promotions apply, but then `bits` > is less than the width of promoted-to type, so no overflow/wraparound is possible > as far as I can tell.) It was so long ago - but I went through so much compiler matrix pain here with compiler warnings with different things I tried. But ultimately - this works around/supports using the maximum size without hitting this: https://godbolt.org/z/W7sE4s8oc #include <cstdint> #include <iostream> int main() { uint32_t bits = 32; uint32_t x = uint32_t{1} << bits; std::cout << x << "\n"; } with -std=c++17 -Wall -Werror -fsanitize=undefined Program returned: 0 /app/example.cpp:8:30: runtime error: shift exponent 32 is too large for 32-bit type 'uint32_t' (aka 'unsigned int') SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /app/example.cpp:8:30 1 So I don't think I can simplify here. > > > > + > > +public: > > + using QuantizedType = T; > > + > > + static constexpr T qMin = std::is_signed_v<T> > > + ? static_cast<T>(-(UT{1} << (bits - 1))) > > + : static_cast<T>(0); > > + > > + static constexpr T qMax = std::is_signed_v<T> > > + ? static_cast<T>((UT{1} << (bits - 1)) - 1) > > + : static_cast<T>((UT{1} << bits) - 1); > > + > > + static constexpr float toFloat(QuantizedType q) > > + { > > + return fixedToFloatingPoint<I, F, float, QuantizedType>(q); > > + } > > + > > + static constexpr float min = fixedToFloatingPoint<I, F, float>(qMin); > > + static constexpr float max = fixedToFloatingPoint<I, F, float>(qMax); > > + > > + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); > > + > > + /* Conversion functions required by Quantized<Traits> */ > > + static QuantizedType fromFloat(float v) > > + { > > + v = std::clamp(v, min, max); > > + return floatingToFixedPoint<I, F, QuantizedType, float>(v); > > + } > > +}; > > + > > +namespace details { > > + > > +template<unsigned int Bits> > > +constexpr auto qtype() > > +{ > > + static_assert(Bits <= 64); > > + > > + if constexpr (Bits <= 8) > > + return int8_t(); > > + else if constexpr (Bits <= 16) > > + return int16_t(); > > + else if constexpr (Bits <= 32) > > + return int32_t(); > > + else if constexpr (Bits <= 64) > > + return int64_t(); > > +} > > + > > +} /* namespace details */ > > + > > +template<unsigned int I, unsigned int F> > > +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; > > + > > +template<unsigned int I, unsigned int F> > > +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; > > + > > } /* namespace ipa */ > > > > } /* namespace libcamera */ >
2026. 01. 21. 13:52 keltezéssel, Kieran Bingham írta: > Quoting Barnabás Pőcze (2026-01-15 16:28:45) >> 2026. 01. 14. 18:39 keltezéssel, Kieran Bingham írta: >>> Extend the new Quantized type infrastructure by providing a >>> FixedPointQTraits template. >>> >>> This allows construction of fixed point types with a Quantized storage >>> that allows easy reading of both the underlying quantized type value and >>> a floating point representation of that same value. >>> >>> Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> >>> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> >>> >>> --- >>> v4: >>> - Assert that the given type has enough bits for the usage >>> - Use unsigned types for calculating qmin/qmax >>> - Reorder toFloat/fromFloat and min/max for future inlining >>> - Make toFloat and fromFloat constexpr >>> >>> v5: >>> - Make UT, Bits and Bitmask private (and remove doxygen) >>> - Remove constexpr from fromFloat which uses std::round (only constexpr >>> in C++23) >>> - static_assert that min<max when converted >>> - Provide new Q and UQ automatic width types (Thanks Barnabás) >>> - Convert types to shortened Q/UQ automatic widths >>> - Use automatic width Q/UQ for 12,4 >>> - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask >>> - Remove typedefs for Q1_7 etc >>> >>> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> >>> --- >>> src/ipa/libipa/fixedpoint.cpp | 89 +++++++++++++++++++++++++++++++++++ >>> src/ipa/libipa/fixedpoint.h | 69 +++++++++++++++++++++++++++ >>> 2 files changed, 158 insertions(+) >>> >>> diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp >>> index 6b698fc5d680..43d76f745d8a 100644 >>> --- a/src/ipa/libipa/fixedpoint.cpp >>> +++ b/src/ipa/libipa/fixedpoint.cpp >>> @@ -37,6 +37,95 @@ namespace ipa { >>> * \return The converted value >>> */ >>> >>> +/** >>> + * \struct libcamera::ipa::FixedPointQTraits >>> + * \brief Traits type implementing fixed-point quantisation conversions >> >> In a different commit you used "quanti[z]...", I think consistency would be better. >> >> >>> + * >>> + * The FixedPointQTraits structure defines a policy for mapping floating-point >>> + * values to and from fixed-point integer representations. It is parameterised >>> + * by the number of integer bits \a I, fractional bits \a F, and the integral >>> + * storage type \a T. The traits are used with Quantized<Traits> to create a >>> + * quantised type that stores both the fixed-point representation and the >>> + * corresponding floating-point value. >>> + * >>> + * The trait exposes compile-time constants describing the bit layout, limits, >>> + * and scaling factors used in the fixed-point representation. >> >> "The sign of the value is determined by the sign of \a T." >> >> I would add something like this. >> > > Also confirmed that the I contains the sign bit for signed types: > > + * The sign of the value is determined by the sign of \a T. For signed types, > + * the number of integer bits in \a I includes the sign bit. > > > >>> + * >>> + * \tparam I Number of integer bits >>> + * \tparam F Number of fractional bits >>> + * \tparam T Integral type used to store the quantised value >>> + */ >>> + >>> +/** >>> + * \typedef FixedPointQTraits::QuantizedType >>> + * \brief The integral storage type used for the fixed-point representation >>> + */ >>> + >>> +/** >>> + * \var FixedPointQTraits::qMin >>> + * \brief Minimum representable quantised integer value >>> + * >>> + * This corresponds to the most negative value for signed formats or zero for >>> + * unsigned formats. >>> + */ >>> + >>> +/** >>> + * \var FixedPointQTraits::qMax >>> + * \brief Maximum representable quantised integer value >>> + */ >>> + >>> +/** >>> + * \var FixedPointQTraits::min >>> + * \brief Minimum representable floating-point value corresponding to qMin >>> + */ >>> + >>> +/** >>> + * \var FixedPointQTraits::max >>> + * \brief Maximum representable floating-point value corresponding to qMax >>> + */ >>> + >>> +/** >>> + * \fn FixedPointQTraits::fromFloat(float v) >>> + * \brief Convert a floating-point value to a fixed-point integer >>> + * \param[in] v The floating-point value to be converted >>> + * \return The quantised fixed-point integer representation >>> + * >>> + * The conversion rounds the floating-point input \a v to the nearest integer >>> + * according to the scaling factor defined by the number of fractional bits F. >> >> I think it's worth mentioning that `v` is clamped first. > > Added. > > > - * The conversion rounds the floating-point input \a v to the nearest integer > - * according to the scaling factor defined by the number of fractional bits F. > + * The conversion first clamps the floating-point input \a v to the range [min, > + * max] and then rounds it to the nearest integer according to the scaling > + * factor defined by the number of fractional bits F. > > >> >> >>> + */ >>> + >>> +/** >>> + * \fn FixedPointQTraits::toFloat(QuantizedType q) >>> + * \brief Convert a fixed-point integer to a floating-point value >>> + * \param[in] q The fixed-point integer value to be converted >>> + * \return The corresponding floating-point value >>> + * >>> + * The conversion sign-extends the integer value if required and divides by the >>> + * scaling factor defined by the number of fractional bits F. >>> + */ >>> + >>> +/** >>> + * \typedef Q >>> + * \brief Define a signed fixed-point quantised type with automatic storage width >>> + * \tparam I The number of integer bits >>> + * \tparam F The number of fractional bits >>> + * >>> + * This alias defines a signed fixed-point quantised type using the >>> + * \ref FixedPointQTraits trait and a suitable signed integer storage type >>> + * automatically selected based on the total number of bits \a (I + F). >>> + */ >>> + >>> +/** >>> + * \typedef UQ >>> + * \brief Define an unsigned fixed-point quantised type with automatic storage width >>> + * \tparam I The number of integer bits >>> + * \tparam F The number of fractional bits >>> + * >>> + * This alias defines an unsigned fixed-point quantised type using the >>> + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type >>> + * automatically selected based on the total number of bits \a (I + F). >>> + */ >>> + >>> } /* namespace ipa */ >>> >>> } /* namespace libcamera */ >>> diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h >>> index b4a7fa5e0ecd..4f6ee081604b 100644 >>> --- a/src/ipa/libipa/fixedpoint.h >>> +++ b/src/ipa/libipa/fixedpoint.h >>> @@ -10,6 +10,8 @@ >>> #include <cmath> >>> #include <type_traits> >>> >>> +#include "quantized.h" >>> + >>> namespace libcamera { >>> >>> namespace ipa { >>> @@ -63,6 +65,73 @@ constexpr R fixedToFloatingPoint(T number) >>> return static_cast<R>(t) / static_cast<R>(1 << F); >>> } >>> >>> +template<unsigned int I, unsigned int F, typename T> >>> +struct FixedPointQTraits { >>> +private: >>> + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); >>> + using UT = std::make_unsigned_t<T>; >>> + >>> + static constexpr unsigned int bits = I + F; >>> + static_assert(bits <= sizeof(T) * 8, "FixedPointQTraits: too many bits for type T"); >>> + >>> + static constexpr T bitMask = (bits < sizeof(T) * 8) >>> + ? static_cast<T>((UT{1} << bits) - 1) >>> + : static_cast<T>(~UT{0}); >> >> I think `static_cast<T>((UT{1} << bits) - 1)` should work in every case. >> If `I+F` is the full width, then `(UT{1} << bits) == 0`, subtracting one >> yields `~UT{0}`. (Unless the usual integer promotions apply, but then `bits` >> is less than the width of promoted-to type, so no overflow/wraparound is possible >> as far as I can tell.) > > It was so long ago - but I went through so much compiler matrix pain > here with compiler warnings with different things I tried. > > But ultimately - this works around/supports using the maximum size > without hitting this: > > > https://godbolt.org/z/W7sE4s8oc > > #include <cstdint> > #include <iostream> > > int main() > { > uint32_t bits = 32; > > uint32_t x = uint32_t{1} << bits; > > std::cout << x << "\n"; > } > > with -std=c++17 -Wall -Werror -fsanitize=undefined > > Program returned: 0 > /app/example.cpp:8:30: runtime error: shift exponent 32 is too large for > 32-bit type 'uint32_t' (aka 'unsigned int') > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior > /app/example.cpp:8:30 > 1 > > So I don't think I can simplify here. Ahh, indeed, sorry. I was too fixated on the left operand that I have missed that the right operand must not be greater or equal to the bit width of the left operand. > > >> >> >>> + >>> +public: >>> + using QuantizedType = T; >>> + >>> + static constexpr T qMin = std::is_signed_v<T> >>> + ? static_cast<T>(-(UT{1} << (bits - 1))) >>> + : static_cast<T>(0); >>> + >>> + static constexpr T qMax = std::is_signed_v<T> >>> + ? static_cast<T>((UT{1} << (bits - 1)) - 1) >>> + : static_cast<T>((UT{1} << bits) - 1); >>> + >>> + static constexpr float toFloat(QuantizedType q) >>> + { >>> + return fixedToFloatingPoint<I, F, float, QuantizedType>(q); >>> + } >>> + >>> + static constexpr float min = fixedToFloatingPoint<I, F, float>(qMin); >>> + static constexpr float max = fixedToFloatingPoint<I, F, float>(qMax); >>> + >>> + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); >>> + >>> + /* Conversion functions required by Quantized<Traits> */ >>> + static QuantizedType fromFloat(float v) >>> + { >>> + v = std::clamp(v, min, max); >>> + return floatingToFixedPoint<I, F, QuantizedType, float>(v); >>> + } >>> +}; >>> + >>> +namespace details { >>> + >>> +template<unsigned int Bits> >>> +constexpr auto qtype() >>> +{ >>> + static_assert(Bits <= 64); >>> + >>> + if constexpr (Bits <= 8) >>> + return int8_t(); >>> + else if constexpr (Bits <= 16) >>> + return int16_t(); >>> + else if constexpr (Bits <= 32) >>> + return int32_t(); >>> + else if constexpr (Bits <= 64) >>> + return int64_t(); >>> +} >>> + >>> +} /* namespace details */ >>> + >>> +template<unsigned int I, unsigned int F> >>> +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; >>> + >>> +template<unsigned int I, unsigned int F> >>> +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; >>> + >>> } /* namespace ipa */ >>> >>> } /* namespace libcamera */ >>
On Wed, Jan 21, 2026 at 02:41:18PM +0100, Barnabás Pőcze wrote: > 2026. 01. 21. 13:52 keltezéssel, Kieran Bingham írta: > > Quoting Barnabás Pőcze (2026-01-15 16:28:45) > >> 2026. 01. 14. 18:39 keltezéssel, Kieran Bingham írta: > >>> Extend the new Quantized type infrastructure by providing a > >>> FixedPointQTraits template. > >>> > >>> This allows construction of fixed point types with a Quantized storage > >>> that allows easy reading of both the underlying quantized type value and > >>> a floating point representation of that same value. > >>> > >>> Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com> > >>> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > >>> > >>> --- > >>> v4: > >>> - Assert that the given type has enough bits for the usage > >>> - Use unsigned types for calculating qmin/qmax > >>> - Reorder toFloat/fromFloat and min/max for future inlining > >>> - Make toFloat and fromFloat constexpr > >>> > >>> v5: > >>> - Make UT, Bits and Bitmask private (and remove doxygen) > >>> - Remove constexpr from fromFloat which uses std::round (only constexpr > >>> in C++23) > >>> - static_assert that min<max when converted > >>> - Provide new Q and UQ automatic width types (Thanks Barnabás) > >>> - Convert types to shortened Q/UQ automatic widths > >>> - Use automatic width Q/UQ for 12,4 > >>> - change qmin->qMin qmax->qMax Bits->bits BitMask->bitMask > >>> - Remove typedefs for Q1_7 etc > >>> > >>> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com> > >>> --- > >>> src/ipa/libipa/fixedpoint.cpp | 89 +++++++++++++++++++++++++++++++++++ > >>> src/ipa/libipa/fixedpoint.h | 69 +++++++++++++++++++++++++++ > >>> 2 files changed, 158 insertions(+) > >>> > >>> diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp > >>> index 6b698fc5d680..43d76f745d8a 100644 > >>> --- a/src/ipa/libipa/fixedpoint.cpp > >>> +++ b/src/ipa/libipa/fixedpoint.cpp > >>> @@ -37,6 +37,95 @@ namespace ipa { > >>> * \return The converted value > >>> */ > >>> > >>> +/** > >>> + * \struct libcamera::ipa::FixedPointQTraits > >>> + * \brief Traits type implementing fixed-point quantisation conversions > >> > >> In a different commit you used "quanti[z]...", I think consistency would be better. > >> > >>> + * > >>> + * The FixedPointQTraits structure defines a policy for mapping floating-point > >>> + * values to and from fixed-point integer representations. It is parameterised > >>> + * by the number of integer bits \a I, fractional bits \a F, and the integral > >>> + * storage type \a T. The traits are used with Quantized<Traits> to create a > >>> + * quantised type that stores both the fixed-point representation and the > >>> + * corresponding floating-point value. > >>> + * > >>> + * The trait exposes compile-time constants describing the bit layout, limits, > >>> + * and scaling factors used in the fixed-point representation. > >> > >> "The sign of the value is determined by the sign of \a T." > >> > >> I would add something like this. > >> > > > > Also confirmed that the I contains the sign bit for signed types: > > > > + * The sign of the value is determined by the sign of \a T. For signed types, > > + * the number of integer bits in \a I includes the sign bit. You're documenting a type, so I would write "The signedness of the type is determined by the signedness of \a T". > >>> + * > >>> + * \tparam I Number of integer bits > >>> + * \tparam F Number of fractional bits > >>> + * \tparam T Integral type used to store the quantised value > >>> + */ > >>> + > >>> +/** > >>> + * \typedef FixedPointQTraits::QuantizedType > >>> + * \brief The integral storage type used for the fixed-point representation > >>> + */ > >>> + > >>> +/** > >>> + * \var FixedPointQTraits::qMin > >>> + * \brief Minimum representable quantised integer value > >>> + * > >>> + * This corresponds to the most negative value for signed formats or zero for > >>> + * unsigned formats. > >>> + */ > >>> + > >>> +/** > >>> + * \var FixedPointQTraits::qMax > >>> + * \brief Maximum representable quantised integer value > >>> + */ > >>> + > >>> +/** > >>> + * \var FixedPointQTraits::min > >>> + * \brief Minimum representable floating-point value corresponding to qMin > >>> + */ > >>> + > >>> +/** > >>> + * \var FixedPointQTraits::max > >>> + * \brief Maximum representable floating-point value corresponding to qMax > >>> + */ > >>> + > >>> +/** > >>> + * \fn FixedPointQTraits::fromFloat(float v) > >>> + * \brief Convert a floating-point value to a fixed-point integer > >>> + * \param[in] v The floating-point value to be converted > >>> + * \return The quantised fixed-point integer representation > >>> + * > >>> + * The conversion rounds the floating-point input \a v to the nearest integer > >>> + * according to the scaling factor defined by the number of fractional bits F. > >> > >> I think it's worth mentioning that `v` is clamped first. > > > > Added. > > > > - * The conversion rounds the floating-point input \a v to the nearest integer > > - * according to the scaling factor defined by the number of fractional bits F. > > + * The conversion first clamps the floating-point input \a v to the range [min, > > + * max] and then rounds it to the nearest integer according to the scaling > > + * factor defined by the number of fractional bits F. > > > >>> + */ > >>> + > >>> +/** > >>> + * \fn FixedPointQTraits::toFloat(QuantizedType q) > >>> + * \brief Convert a fixed-point integer to a floating-point value > >>> + * \param[in] q The fixed-point integer value to be converted > >>> + * \return The corresponding floating-point value > >>> + * > >>> + * The conversion sign-extends the integer value if required and divides by the > >>> + * scaling factor defined by the number of fractional bits F. > >>> + */ > >>> + > >>> +/** > >>> + * \typedef Q > >>> + * \brief Define a signed fixed-point quantised type with automatic storage width > >>> + * \tparam I The number of integer bits > >>> + * \tparam F The number of fractional bits > >>> + * > >>> + * This alias defines a signed fixed-point quantised type using the > >>> + * \ref FixedPointQTraits trait and a suitable signed integer storage type > >>> + * automatically selected based on the total number of bits \a (I + F). > >>> + */ > >>> + > >>> +/** > >>> + * \typedef UQ > >>> + * \brief Define an unsigned fixed-point quantised type with automatic storage width > >>> + * \tparam I The number of integer bits > >>> + * \tparam F The number of fractional bits > >>> + * > >>> + * This alias defines an unsigned fixed-point quantised type using the > >>> + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type > >>> + * automatically selected based on the total number of bits \a (I + F). > >>> + */ > >>> + > >>> } /* namespace ipa */ > >>> > >>> } /* namespace libcamera */ > >>> diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h > >>> index b4a7fa5e0ecd..4f6ee081604b 100644 > >>> --- a/src/ipa/libipa/fixedpoint.h > >>> +++ b/src/ipa/libipa/fixedpoint.h > >>> @@ -10,6 +10,8 @@ > >>> #include <cmath> > >>> #include <type_traits> > >>> > >>> +#include "quantized.h" > >>> + > >>> namespace libcamera { > >>> > >>> namespace ipa { > >>> @@ -63,6 +65,73 @@ constexpr R fixedToFloatingPoint(T number) > >>> return static_cast<R>(t) / static_cast<R>(1 << F); > >>> } > >>> > >>> +template<unsigned int I, unsigned int F, typename T> > >>> +struct FixedPointQTraits { > >>> +private: > >>> + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); > >>> + using UT = std::make_unsigned_t<T>; > >>> + > >>> + static constexpr unsigned int bits = I + F; > >>> + static_assert(bits <= sizeof(T) * 8, "FixedPointQTraits: too many bits for type T"); > >>> + > >>> + static constexpr T bitMask = (bits < sizeof(T) * 8) > >>> + ? static_cast<T>((UT{1} << bits) - 1) > >>> + : static_cast<T>(~UT{0}); > >> > >> I think `static_cast<T>((UT{1} << bits) - 1)` should work in every case. > >> If `I+F` is the full width, then `(UT{1} << bits) == 0`, subtracting one > >> yields `~UT{0}`. (Unless the usual integer promotions apply, but then `bits` > >> is less than the width of promoted-to type, so no overflow/wraparound is possible > >> as far as I can tell.) > > > > It was so long ago - but I went through so much compiler matrix pain > > here with compiler warnings with different things I tried. > > > > But ultimately - this works around/supports using the maximum size > > without hitting this: > > > > https://godbolt.org/z/W7sE4s8oc > > > > #include <cstdint> > > #include <iostream> > > > > int main() > > { > > uint32_t bits = 32; > > > > uint32_t x = uint32_t{1} << bits; > > > > std::cout << x << "\n"; > > } > > > > with -std=c++17 -Wall -Werror -fsanitize=undefined > > > > Program returned: 0 > > /app/example.cpp:8:30: runtime error: shift exponent 32 is too large for > > 32-bit type 'uint32_t' (aka 'unsigned int') > > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior > > /app/example.cpp:8:30 > > 1 > > > > So I don't think I can simplify here. > > Ahh, indeed, sorry. I was too fixated on the left operand that I have > missed that the right operand must not be greater or equal to the bit > width of the left operand. > > >>> + > >>> +public: > >>> + using QuantizedType = T; > >>> + > >>> + static constexpr T qMin = std::is_signed_v<T> > >>> + ? static_cast<T>(-(UT{1} << (bits - 1))) > >>> + : static_cast<T>(0); > >>> + > >>> + static constexpr T qMax = std::is_signed_v<T> > >>> + ? static_cast<T>((UT{1} << (bits - 1)) - 1) > >>> + : static_cast<T>((UT{1} << bits) - 1); > >>> + > >>> + static constexpr float toFloat(QuantizedType q) > >>> + { > >>> + return fixedToFloatingPoint<I, F, float, QuantizedType>(q); > >>> + } > >>> + > >>> + static constexpr float min = fixedToFloatingPoint<I, F, float>(qMin); > >>> + static constexpr float max = fixedToFloatingPoint<I, F, float>(qMax); > >>> + > >>> + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); > >>> + > >>> + /* Conversion functions required by Quantized<Traits> */ > >>> + static QuantizedType fromFloat(float v) > >>> + { > >>> + v = std::clamp(v, min, max); > >>> + return floatingToFixedPoint<I, F, QuantizedType, float>(v); > >>> + } > >>> +}; > >>> + > >>> +namespace details { > >>> + > >>> +template<unsigned int Bits> > >>> +constexpr auto qtype() > >>> +{ > >>> + static_assert(Bits <= 64); > >>> + > >>> + if constexpr (Bits <= 8) > >>> + return int8_t(); > >>> + else if constexpr (Bits <= 16) > >>> + return int16_t(); > >>> + else if constexpr (Bits <= 32) > >>> + return int32_t(); > >>> + else if constexpr (Bits <= 64) > >>> + return int64_t(); > >>> +} > >>> + > >>> +} /* namespace details */ > >>> + > >>> +template<unsigned int I, unsigned int F> > >>> +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; > >>> + > >>> +template<unsigned int I, unsigned int F> > >>> +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; > >>> + > >>> } /* namespace ipa */ > >>> > >>> } /* namespace libcamera */
diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp index 6b698fc5d680..43d76f745d8a 100644 --- a/src/ipa/libipa/fixedpoint.cpp +++ b/src/ipa/libipa/fixedpoint.cpp @@ -37,6 +37,95 @@ namespace ipa { * \return The converted value */ +/** + * \struct libcamera::ipa::FixedPointQTraits + * \brief Traits type implementing fixed-point quantisation conversions + * + * The FixedPointQTraits structure defines a policy for mapping floating-point + * values to and from fixed-point integer representations. It is parameterised + * by the number of integer bits \a I, fractional bits \a F, and the integral + * storage type \a T. The traits are used with Quantized<Traits> to create a + * quantised type that stores both the fixed-point representation and the + * corresponding floating-point value. + * + * The trait exposes compile-time constants describing the bit layout, limits, + * and scaling factors used in the fixed-point representation. + * + * \tparam I Number of integer bits + * \tparam F Number of fractional bits + * \tparam T Integral type used to store the quantised value + */ + +/** + * \typedef FixedPointQTraits::QuantizedType + * \brief The integral storage type used for the fixed-point representation + */ + +/** + * \var FixedPointQTraits::qMin + * \brief Minimum representable quantised integer value + * + * This corresponds to the most negative value for signed formats or zero for + * unsigned formats. + */ + +/** + * \var FixedPointQTraits::qMax + * \brief Maximum representable quantised integer value + */ + +/** + * \var FixedPointQTraits::min + * \brief Minimum representable floating-point value corresponding to qMin + */ + +/** + * \var FixedPointQTraits::max + * \brief Maximum representable floating-point value corresponding to qMax + */ + +/** + * \fn FixedPointQTraits::fromFloat(float v) + * \brief Convert a floating-point value to a fixed-point integer + * \param[in] v The floating-point value to be converted + * \return The quantised fixed-point integer representation + * + * The conversion rounds the floating-point input \a v to the nearest integer + * according to the scaling factor defined by the number of fractional bits F. + */ + +/** + * \fn FixedPointQTraits::toFloat(QuantizedType q) + * \brief Convert a fixed-point integer to a floating-point value + * \param[in] q The fixed-point integer value to be converted + * \return The corresponding floating-point value + * + * The conversion sign-extends the integer value if required and divides by the + * scaling factor defined by the number of fractional bits F. + */ + +/** + * \typedef Q + * \brief Define a signed fixed-point quantised type with automatic storage width + * \tparam I The number of integer bits + * \tparam F The number of fractional bits + * + * This alias defines a signed fixed-point quantised type using the + * \ref FixedPointQTraits trait and a suitable signed integer storage type + * automatically selected based on the total number of bits \a (I + F). + */ + +/** + * \typedef UQ + * \brief Define an unsigned fixed-point quantised type with automatic storage width + * \tparam I The number of integer bits + * \tparam F The number of fractional bits + * + * This alias defines an unsigned fixed-point quantised type using the + * \ref FixedPointQTraits trait and a suitable unsigned integer storage type + * automatically selected based on the total number of bits \a (I + F). + */ + } /* namespace ipa */ } /* namespace libcamera */ diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h index b4a7fa5e0ecd..4f6ee081604b 100644 --- a/src/ipa/libipa/fixedpoint.h +++ b/src/ipa/libipa/fixedpoint.h @@ -10,6 +10,8 @@ #include <cmath> #include <type_traits> +#include "quantized.h" + namespace libcamera { namespace ipa { @@ -63,6 +65,73 @@ constexpr R fixedToFloatingPoint(T number) return static_cast<R>(t) / static_cast<R>(1 << F); } +template<unsigned int I, unsigned int F, typename T> +struct FixedPointQTraits { +private: + static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral"); + using UT = std::make_unsigned_t<T>; + + static constexpr unsigned int bits = I + F; + static_assert(bits <= sizeof(T) * 8, "FixedPointQTraits: too many bits for type T"); + + static constexpr T bitMask = (bits < sizeof(T) * 8) + ? static_cast<T>((UT{1} << bits) - 1) + : static_cast<T>(~UT{0}); + +public: + using QuantizedType = T; + + static constexpr T qMin = std::is_signed_v<T> + ? static_cast<T>(-(UT{1} << (bits - 1))) + : static_cast<T>(0); + + static constexpr T qMax = std::is_signed_v<T> + ? static_cast<T>((UT{1} << (bits - 1)) - 1) + : static_cast<T>((UT{1} << bits) - 1); + + static constexpr float toFloat(QuantizedType q) + { + return fixedToFloatingPoint<I, F, float, QuantizedType>(q); + } + + static constexpr float min = fixedToFloatingPoint<I, F, float>(qMin); + static constexpr float max = fixedToFloatingPoint<I, F, float>(qMax); + + static_assert(min < max, "FixedPointQTraits: Minimum must be less than maximum"); + + /* Conversion functions required by Quantized<Traits> */ + static QuantizedType fromFloat(float v) + { + v = std::clamp(v, min, max); + return floatingToFixedPoint<I, F, QuantizedType, float>(v); + } +}; + +namespace details { + +template<unsigned int Bits> +constexpr auto qtype() +{ + static_assert(Bits <= 64); + + if constexpr (Bits <= 8) + return int8_t(); + else if constexpr (Bits <= 16) + return int16_t(); + else if constexpr (Bits <= 32) + return int32_t(); + else if constexpr (Bits <= 64) + return int64_t(); +} + +} /* namespace details */ + +template<unsigned int I, unsigned int F> +using Q = Quantized<FixedPointQTraits<I, F, decltype(details::qtype<I + F>())>>; + +template<unsigned int I, unsigned int F> +using UQ = Quantized<FixedPointQTraits<I, F, std::make_unsigned_t<decltype(details::qtype<I + F>())>>>; + } /* namespace ipa */ } /* namespace libcamera */