[v4,20/21] ipa: libipa: fixedpoint: Move float conversion inline
diff mbox series

Message ID 20251114005428.90024-21-kieran.bingham@ideasonboard.com
State New
Headers show
Series
  • libipa: Introduce a Quantized type
Related show

Commit Message

Kieran Bingham Nov. 14, 2025, 12:54 a.m. UTC
With all users of the floatingToFixedPoint and fixedToFloatingPoint
calls converted to use the FixedPoint Quantized types, remove the calls
and documentation and move the implementation inline in the
FixedPointTraits implementation.

Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
---
 src/ipa/libipa/fixedpoint.cpp | 22 ----------
 src/ipa/libipa/fixedpoint.h   | 76 +++++++++++------------------------
 2 files changed, 23 insertions(+), 75 deletions(-)

Comments

Isaac Scott Nov. 18, 2025, 12:42 p.m. UTC | #1
Hi Kieran,

Thank you for the patch!

Reviewed-by: Isaac Scott <isaac.scott@ideasonboard.com>

Quoting Kieran Bingham (2025-11-14 00:54:24)
> With all users of the floatingToFixedPoint and fixedToFloatingPoint
> calls converted to use the FixedPoint Quantized types, remove the calls
> and documentation and move the implementation inline in the
> FixedPointTraits implementation.
> 
> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
> ---
>  src/ipa/libipa/fixedpoint.cpp | 22 ----------
>  src/ipa/libipa/fixedpoint.h   | 76 +++++++++++------------------------
>  2 files changed, 23 insertions(+), 75 deletions(-)
> 
> diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp
> index 97cdd9b3f7aa..f2c93b1cf706 100644
> --- a/src/ipa/libipa/fixedpoint.cpp
> +++ b/src/ipa/libipa/fixedpoint.cpp
> @@ -15,28 +15,6 @@ namespace libcamera {
>  
>  namespace ipa {
>  
> -/**
> - * \fn R floatingToFixedPoint(T number)
> - * \brief Convert a floating point number to a fixed-point representation
> - * \tparam I Bit width of the integer part of the fixed-point
> - * \tparam F Bit width of the fractional part of the fixed-point
> - * \tparam R Return type of the fixed-point representation
> - * \tparam T Input type of the floating point representation
> - * \param number The floating point number to convert to fixed point
> - * \return The converted value
> - */
> -
> -/**
> - * \fn R fixedToFloatingPoint(T number)
> - * \brief Convert a fixed-point number to a floating point representation
> - * \tparam I Bit width of the integer part of the fixed-point
> - * \tparam F Bit width of the fractional part of the fixed-point
> - * \tparam R Return type of the floating point representation
> - * \tparam T Input type of the fixed-point representation
> - * \param number The fixed point number to convert to floating point
> - * \return The converted value
> - */
> -
>  /**
>   * \struct libcamera::ipa::FixedPointQTraits
>   * \brief Traits type implementing fixed-point quantisation conversions
> diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h
> index 1e984350111c..fe75af16bb54 100644
> --- a/src/ipa/libipa/fixedpoint.h
> +++ b/src/ipa/libipa/fixedpoint.h
> @@ -16,55 +16,6 @@ namespace libcamera {
>  
>  namespace ipa {
>  
> -#ifndef __DOXYGEN__
> -template<unsigned int I, unsigned int F, typename R, typename T,
> -        std::enable_if_t<std::is_integral_v<R> &&
> -                         std::is_floating_point_v<T>> * = nullptr>
> -#else
> -template<unsigned int I, unsigned int F, typename R, typename T>
> -#endif
> -constexpr R floatingToFixedPoint(T number)
> -{
> -       static_assert(sizeof(int) >= sizeof(R));
> -       static_assert(I + F <= sizeof(R) * 8);
> -
> -       /*
> -        * The intermediate cast to int is needed on arm platforms to properly
> -        * cast negative values. See
> -        * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> -        */
> -       R mask = (1 << (F + I)) - 1;
> -       R frac = static_cast<R>(static_cast<int>(std::round(number * (1 << F)))) & mask;
> -
> -       return frac;
> -}
> -
> -#ifndef __DOXYGEN__
> -template<unsigned int I, unsigned int F, typename R, typename T,
> -        std::enable_if_t<std::is_floating_point_v<R> &&
> -                         std::is_integral_v<T>> * = nullptr>
> -#else
> -template<unsigned int I, unsigned int F, typename R, typename T>
> -#endif
> -constexpr R fixedToFloatingPoint(T number)
> -{
> -       static_assert(sizeof(int) >= sizeof(T));
> -       static_assert(I + F <= sizeof(T) * 8);
> -
> -       if constexpr (std::is_unsigned_v<T>)
> -               return static_cast<R>(number) / static_cast<R>(1 << F);
> -
> -       /*
> -        * Recreate the upper bits in case of a negative number by shifting the sign
> -        * bit from the fixed point to the first bit of the unsigned and then right shifting
> -        * by the same amount which keeps the sign bit in place.
> -        * This can be optimized by the compiler quite well.
> -        */
> -       int remaining_bits = sizeof(int) * 8 - (I + F);
> -       int t = static_cast<int>(static_cast<unsigned>(number) << remaining_bits) >> remaining_bits;
> -       return static_cast<R>(t) / static_cast<R>(1 << F);
> -}
> -
>  template<unsigned int I, unsigned int F, typename T>
>  struct FixedPointQTraits {
>         static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral");
> @@ -88,17 +39,36 @@ struct FixedPointQTraits {
>  
>         static constexpr float toFloat(QuantizedType q)
>         {
> -               return fixedToFloatingPoint<I, F, float, QuantizedType>(q);
> +               if constexpr (std::is_unsigned_v<T>)
> +                       return static_cast<float>(q) / static_cast<float>(1 << F);
> +
> +               /*
> +                * Recreate the upper bits in case of a negative number by shifting the sign
> +                * bit from the fixed point to the first bit of the unsigned and then right shifting
> +                * by the same amount which keeps the sign bit in place.
> +                * This can be optimized by the compiler quite well.
> +                */
> +               static_assert(sizeof(int) >= sizeof(T));
> +
> +               int remaining_bits = sizeof(int) * 8 - (I + F);
> +               int t = static_cast<int>(static_cast<unsigned>(q) << remaining_bits) >> remaining_bits;
> +               return static_cast<float>(t) / static_cast<float>(1 << F);
>         }
>  
> -       static constexpr float min = fixedToFloatingPoint<I, F, float>(qmin);
> -       static constexpr float max = fixedToFloatingPoint<I, F, float>(qmax);
> +       static constexpr float min = toFloat(qmin);
> +       static constexpr float max = toFloat(qmax);
>  
>         /* Conversion functions required by Quantized<Traits> */
>         static constexpr QuantizedType fromFloat(float v)
>         {
>                 v = std::clamp(v, min, max);
> -               return floatingToFixedPoint<I, F, QuantizedType, float>(v);
> +
> +               /*
> +                * The intermediate cast to int is needed on arm platforms to properly
> +                * cast negative values. See
> +                * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> +                */
> +               return static_cast<T>(static_cast<int>(std::round(v * (1 << F)))) & BitMask;
>         }
>  };
>  
> -- 
> 2.51.1
>
Laurent Pinchart Nov. 19, 2025, 4:31 a.m. UTC | #2
On Fri, Nov 14, 2025 at 12:54:24AM +0000, Kieran Bingham wrote:
> With all users of the floatingToFixedPoint and fixedToFloatingPoint
> calls converted to use the FixedPoint Quantized types, remove the calls
> and documentation and move the implementation inline in the
> FixedPointTraits implementation.
> 
> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
> ---
>  src/ipa/libipa/fixedpoint.cpp | 22 ----------
>  src/ipa/libipa/fixedpoint.h   | 76 +++++++++++------------------------
>  2 files changed, 23 insertions(+), 75 deletions(-)
> 
> diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp
> index 97cdd9b3f7aa..f2c93b1cf706 100644
> --- a/src/ipa/libipa/fixedpoint.cpp
> +++ b/src/ipa/libipa/fixedpoint.cpp
> @@ -15,28 +15,6 @@ namespace libcamera {
>  
>  namespace ipa {
>  
> -/**
> - * \fn R floatingToFixedPoint(T number)
> - * \brief Convert a floating point number to a fixed-point representation
> - * \tparam I Bit width of the integer part of the fixed-point
> - * \tparam F Bit width of the fractional part of the fixed-point
> - * \tparam R Return type of the fixed-point representation
> - * \tparam T Input type of the floating point representation
> - * \param number The floating point number to convert to fixed point
> - * \return The converted value
> - */
> -
> -/**
> - * \fn R fixedToFloatingPoint(T number)
> - * \brief Convert a fixed-point number to a floating point representation
> - * \tparam I Bit width of the integer part of the fixed-point
> - * \tparam F Bit width of the fractional part of the fixed-point
> - * \tparam R Return type of the floating point representation
> - * \tparam T Input type of the fixed-point representation
> - * \param number The fixed point number to convert to floating point
> - * \return The converted value
> - */
> -
>  /**
>   * \struct libcamera::ipa::FixedPointQTraits
>   * \brief Traits type implementing fixed-point quantisation conversions
> diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h
> index 1e984350111c..fe75af16bb54 100644
> --- a/src/ipa/libipa/fixedpoint.h
> +++ b/src/ipa/libipa/fixedpoint.h
> @@ -16,55 +16,6 @@ namespace libcamera {
>  
>  namespace ipa {
>  
> -#ifndef __DOXYGEN__
> -template<unsigned int I, unsigned int F, typename R, typename T,
> -	 std::enable_if_t<std::is_integral_v<R> &&
> -			  std::is_floating_point_v<T>> * = nullptr>
> -#else
> -template<unsigned int I, unsigned int F, typename R, typename T>
> -#endif
> -constexpr R floatingToFixedPoint(T number)
> -{
> -	static_assert(sizeof(int) >= sizeof(R));
> -	static_assert(I + F <= sizeof(R) * 8);
> -
> -	/*
> -	 * The intermediate cast to int is needed on arm platforms to properly
> -	 * cast negative values. See
> -	 * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> -	 */
> -	R mask = (1 << (F + I)) - 1;
> -	R frac = static_cast<R>(static_cast<int>(std::round(number * (1 << F)))) & mask;
> -
> -	return frac;
> -}
> -
> -#ifndef __DOXYGEN__
> -template<unsigned int I, unsigned int F, typename R, typename T,
> -	 std::enable_if_t<std::is_floating_point_v<R> &&
> -			  std::is_integral_v<T>> * = nullptr>
> -#else
> -template<unsigned int I, unsigned int F, typename R, typename T>
> -#endif
> -constexpr R fixedToFloatingPoint(T number)
> -{
> -	static_assert(sizeof(int) >= sizeof(T));
> -	static_assert(I + F <= sizeof(T) * 8);
> -
> -	if constexpr (std::is_unsigned_v<T>)
> -		return static_cast<R>(number) / static_cast<R>(1 << F);
> -
> -	/*
> -	 * Recreate the upper bits in case of a negative number by shifting the sign
> -	 * bit from the fixed point to the first bit of the unsigned and then right shifting
> -	 * by the same amount which keeps the sign bit in place.
> -	 * This can be optimized by the compiler quite well.
> -	 */
> -	int remaining_bits = sizeof(int) * 8 - (I + F);
> -	int t = static_cast<int>(static_cast<unsigned>(number) << remaining_bits) >> remaining_bits;
> -	return static_cast<R>(t) / static_cast<R>(1 << F);
> -}
> -
>  template<unsigned int I, unsigned int F, typename T>
>  struct FixedPointQTraits {
>  	static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral");
> @@ -88,17 +39,36 @@ struct FixedPointQTraits {
>  
>  	static constexpr float toFloat(QuantizedType q)
>  	{
> -		return fixedToFloatingPoint<I, F, float, QuantizedType>(q);
> +		if constexpr (std::is_unsigned_v<T>)
> +			return static_cast<float>(q) / static_cast<float>(1 << F);
> +
> +		/*
> +		 * Recreate the upper bits in case of a negative number by shifting the sign
> +		 * bit from the fixed point to the first bit of the unsigned and then right shifting
> +		 * by the same amount which keeps the sign bit in place.
> +		 * This can be optimized by the compiler quite well.
> +		 */

While at it, please reflow to 80 columns.

> +		static_assert(sizeof(int) >= sizeof(T));
> +
> +		int remaining_bits = sizeof(int) * 8 - (I + F);
> +		int t = static_cast<int>(static_cast<unsigned>(q) << remaining_bits) >> remaining_bits;
> +		return static_cast<float>(t) / static_cast<float>(1 << F);
>  	}
>  
> -	static constexpr float min = fixedToFloatingPoint<I, F, float>(qmin);
> -	static constexpr float max = fixedToFloatingPoint<I, F, float>(qmax);
> +	static constexpr float min = toFloat(qmin);
> +	static constexpr float max = toFloat(qmax);
>  
>  	/* Conversion functions required by Quantized<Traits> */
>  	static constexpr QuantizedType fromFloat(float v)
>  	{
>  		v = std::clamp(v, min, max);
> -		return floatingToFixedPoint<I, F, QuantizedType, float>(v);
> +
> +		/*
> +		 * The intermediate cast to int is needed on arm platforms to properly
> +		 * cast negative values. See

Same here.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>

> +		 * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> +		 */
> +		return static_cast<T>(static_cast<int>(std::round(v * (1 << F)))) & BitMask;
>  	}
>  };
>
Barnabás Pőcze Nov. 20, 2025, 5:26 p.m. UTC | #3
2025. 11. 14. 1:54 keltezéssel, Kieran Bingham írta:
> With all users of the floatingToFixedPoint and fixedToFloatingPoint
> calls converted to use the FixedPoint Quantized types, remove the calls
> and documentation and move the implementation inline in the
> FixedPointTraits implementation.

I'm wondering if we really want to remove them, the functions are somewhat
more flexible (e.g. allowing `double`). But arguably that is not used after
these changes.

If yes:
Reviewed-by: Barnabás Pőcze <barnabas.pocze@ideasonboard.com>

> 
> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
> ---
>   src/ipa/libipa/fixedpoint.cpp | 22 ----------
>   src/ipa/libipa/fixedpoint.h   | 76 +++++++++++------------------------
>   2 files changed, 23 insertions(+), 75 deletions(-)
> 
> diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp
> index 97cdd9b3f7aa..f2c93b1cf706 100644
> --- a/src/ipa/libipa/fixedpoint.cpp
> +++ b/src/ipa/libipa/fixedpoint.cpp
> @@ -15,28 +15,6 @@ namespace libcamera {
>   
>   namespace ipa {
>   
> -/**
> - * \fn R floatingToFixedPoint(T number)
> - * \brief Convert a floating point number to a fixed-point representation
> - * \tparam I Bit width of the integer part of the fixed-point
> - * \tparam F Bit width of the fractional part of the fixed-point
> - * \tparam R Return type of the fixed-point representation
> - * \tparam T Input type of the floating point representation
> - * \param number The floating point number to convert to fixed point
> - * \return The converted value
> - */
> -
> -/**
> - * \fn R fixedToFloatingPoint(T number)
> - * \brief Convert a fixed-point number to a floating point representation
> - * \tparam I Bit width of the integer part of the fixed-point
> - * \tparam F Bit width of the fractional part of the fixed-point
> - * \tparam R Return type of the floating point representation
> - * \tparam T Input type of the fixed-point representation
> - * \param number The fixed point number to convert to floating point
> - * \return The converted value
> - */
> -
>   /**
>    * \struct libcamera::ipa::FixedPointQTraits
>    * \brief Traits type implementing fixed-point quantisation conversions
> diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h
> index 1e984350111c..fe75af16bb54 100644
> --- a/src/ipa/libipa/fixedpoint.h
> +++ b/src/ipa/libipa/fixedpoint.h
> @@ -16,55 +16,6 @@ namespace libcamera {
>   
>   namespace ipa {
>   
> -#ifndef __DOXYGEN__
> -template<unsigned int I, unsigned int F, typename R, typename T,
> -	 std::enable_if_t<std::is_integral_v<R> &&
> -			  std::is_floating_point_v<T>> * = nullptr>
> -#else
> -template<unsigned int I, unsigned int F, typename R, typename T>
> -#endif
> -constexpr R floatingToFixedPoint(T number)
> -{
> -	static_assert(sizeof(int) >= sizeof(R));
> -	static_assert(I + F <= sizeof(R) * 8);
> -
> -	/*
> -	 * The intermediate cast to int is needed on arm platforms to properly
> -	 * cast negative values. See
> -	 * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> -	 */
> -	R mask = (1 << (F + I)) - 1;
> -	R frac = static_cast<R>(static_cast<int>(std::round(number * (1 << F)))) & mask;
> -
> -	return frac;
> -}
> -
> -#ifndef __DOXYGEN__
> -template<unsigned int I, unsigned int F, typename R, typename T,
> -	 std::enable_if_t<std::is_floating_point_v<R> &&
> -			  std::is_integral_v<T>> * = nullptr>
> -#else
> -template<unsigned int I, unsigned int F, typename R, typename T>
> -#endif
> -constexpr R fixedToFloatingPoint(T number)
> -{
> -	static_assert(sizeof(int) >= sizeof(T));
> -	static_assert(I + F <= sizeof(T) * 8);
> -
> -	if constexpr (std::is_unsigned_v<T>)
> -		return static_cast<R>(number) / static_cast<R>(1 << F);
> -
> -	/*
> -	 * Recreate the upper bits in case of a negative number by shifting the sign
> -	 * bit from the fixed point to the first bit of the unsigned and then right shifting
> -	 * by the same amount which keeps the sign bit in place.
> -	 * This can be optimized by the compiler quite well.
> -	 */
> -	int remaining_bits = sizeof(int) * 8 - (I + F);
> -	int t = static_cast<int>(static_cast<unsigned>(number) << remaining_bits) >> remaining_bits;
> -	return static_cast<R>(t) / static_cast<R>(1 << F);
> -}
> -
>   template<unsigned int I, unsigned int F, typename T>
>   struct FixedPointQTraits {
>   	static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral");
> @@ -88,17 +39,36 @@ struct FixedPointQTraits {
>   
>   	static constexpr float toFloat(QuantizedType q)
>   	{
> -		return fixedToFloatingPoint<I, F, float, QuantizedType>(q);
> +		if constexpr (std::is_unsigned_v<T>)
> +			return static_cast<float>(q) / static_cast<float>(1 << F);
> +
> +		/*
> +		 * Recreate the upper bits in case of a negative number by shifting the sign
> +		 * bit from the fixed point to the first bit of the unsigned and then right shifting
> +		 * by the same amount which keeps the sign bit in place.
> +		 * This can be optimized by the compiler quite well.
> +		 */
> +		static_assert(sizeof(int) >= sizeof(T));

I'm wondering how easily this limitation can be lifted, e.g. is

   // fromFloat
   static_cast<T>(static_cast<std::make_signed_t<T>>(std::round(v * (1 << F)))) & BitMask
  

   // toFloat
   static_cast<T>(static_cast<std::make_unsigned_t<T>>(q) << remaining_bits) >> remaining_bits);

sufficient?


> +
> +		int remaining_bits = sizeof(int) * 8 - (I + F);

This could very well be a `constexpr` thing.


> +		int t = static_cast<int>(static_cast<unsigned>(q) << remaining_bits) >> remaining_bits;
> +		return static_cast<float>(t) / static_cast<float>(1 << F);
>   	}
>   
> -	static constexpr float min = fixedToFloatingPoint<I, F, float>(qmin);
> -	static constexpr float max = fixedToFloatingPoint<I, F, float>(qmax);
> +	static constexpr float min = toFloat(qmin);
> +	static constexpr float max = toFloat(qmax);
>   
>   	/* Conversion functions required by Quantized<Traits> */
>   	static constexpr QuantizedType fromFloat(float v)
>   	{
>   		v = std::clamp(v, min, max);
> -		return floatingToFixedPoint<I, F, QuantizedType, float>(v);
> +
> +		/*
> +		 * The intermediate cast to int is needed on arm platforms to properly
> +		 * cast negative values. See
> +		 * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> +		 */
> +		return static_cast<T>(static_cast<int>(std::round(v * (1 << F)))) & BitMask;
>   	}
>   };
>
Kieran Bingham Nov. 20, 2025, 5:50 p.m. UTC | #4
Quoting Barnabás Pőcze (2025-11-20 17:26:42)
> 2025. 11. 14. 1:54 keltezéssel, Kieran Bingham írta:
> > With all users of the floatingToFixedPoint and fixedToFloatingPoint
> > calls converted to use the FixedPoint Quantized types, remove the calls
> > and documentation and move the implementation inline in the
> > FixedPointTraits implementation.
> 
> I'm wondering if we really want to remove them, the functions are somewhat
> more flexible (e.g. allowing `double`). But arguably that is not used after
> these changes.

If we get any users of types bigger than Q<I+F > 32> perhaps we
could consider extending the Q type to be able to support double but I
don't see that being likely

> 
> If yes:
> Reviewed-by: Barnabás Pőcze <barnabas.pocze@ideasonboard.com>

Thanks

Kieran

> 
> > 
> > Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
> > ---
> >   src/ipa/libipa/fixedpoint.cpp | 22 ----------
> >   src/ipa/libipa/fixedpoint.h   | 76 +++++++++++------------------------
> >   2 files changed, 23 insertions(+), 75 deletions(-)
> > 
> > diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp
> > index 97cdd9b3f7aa..f2c93b1cf706 100644
> > --- a/src/ipa/libipa/fixedpoint.cpp
> > +++ b/src/ipa/libipa/fixedpoint.cpp
> > @@ -15,28 +15,6 @@ namespace libcamera {
> >   
> >   namespace ipa {
> >   
> > -/**
> > - * \fn R floatingToFixedPoint(T number)
> > - * \brief Convert a floating point number to a fixed-point representation
> > - * \tparam I Bit width of the integer part of the fixed-point
> > - * \tparam F Bit width of the fractional part of the fixed-point
> > - * \tparam R Return type of the fixed-point representation
> > - * \tparam T Input type of the floating point representation
> > - * \param number The floating point number to convert to fixed point
> > - * \return The converted value
> > - */
> > -
> > -/**
> > - * \fn R fixedToFloatingPoint(T number)
> > - * \brief Convert a fixed-point number to a floating point representation
> > - * \tparam I Bit width of the integer part of the fixed-point
> > - * \tparam F Bit width of the fractional part of the fixed-point
> > - * \tparam R Return type of the floating point representation
> > - * \tparam T Input type of the fixed-point representation
> > - * \param number The fixed point number to convert to floating point
> > - * \return The converted value
> > - */
> > -
> >   /**
> >    * \struct libcamera::ipa::FixedPointQTraits
> >    * \brief Traits type implementing fixed-point quantisation conversions
> > diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h
> > index 1e984350111c..fe75af16bb54 100644
> > --- a/src/ipa/libipa/fixedpoint.h
> > +++ b/src/ipa/libipa/fixedpoint.h
> > @@ -16,55 +16,6 @@ namespace libcamera {
> >   
> >   namespace ipa {
> >   
> > -#ifndef __DOXYGEN__
> > -template<unsigned int I, unsigned int F, typename R, typename T,
> > -      std::enable_if_t<std::is_integral_v<R> &&
> > -                       std::is_floating_point_v<T>> * = nullptr>
> > -#else
> > -template<unsigned int I, unsigned int F, typename R, typename T>
> > -#endif
> > -constexpr R floatingToFixedPoint(T number)
> > -{
> > -     static_assert(sizeof(int) >= sizeof(R));
> > -     static_assert(I + F <= sizeof(R) * 8);
> > -
> > -     /*
> > -      * The intermediate cast to int is needed on arm platforms to properly
> > -      * cast negative values. See
> > -      * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> > -      */
> > -     R mask = (1 << (F + I)) - 1;
> > -     R frac = static_cast<R>(static_cast<int>(std::round(number * (1 << F)))) & mask;
> > -
> > -     return frac;
> > -}
> > -
> > -#ifndef __DOXYGEN__
> > -template<unsigned int I, unsigned int F, typename R, typename T,
> > -      std::enable_if_t<std::is_floating_point_v<R> &&
> > -                       std::is_integral_v<T>> * = nullptr>
> > -#else
> > -template<unsigned int I, unsigned int F, typename R, typename T>
> > -#endif
> > -constexpr R fixedToFloatingPoint(T number)
> > -{
> > -     static_assert(sizeof(int) >= sizeof(T));
> > -     static_assert(I + F <= sizeof(T) * 8);
> > -
> > -     if constexpr (std::is_unsigned_v<T>)
> > -             return static_cast<R>(number) / static_cast<R>(1 << F);
> > -
> > -     /*
> > -      * Recreate the upper bits in case of a negative number by shifting the sign
> > -      * bit from the fixed point to the first bit of the unsigned and then right shifting
> > -      * by the same amount which keeps the sign bit in place.
> > -      * This can be optimized by the compiler quite well.
> > -      */
> > -     int remaining_bits = sizeof(int) * 8 - (I + F);
> > -     int t = static_cast<int>(static_cast<unsigned>(number) << remaining_bits) >> remaining_bits;
> > -     return static_cast<R>(t) / static_cast<R>(1 << F);
> > -}
> > -
> >   template<unsigned int I, unsigned int F, typename T>
> >   struct FixedPointQTraits {
> >       static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral");
> > @@ -88,17 +39,36 @@ struct FixedPointQTraits {
> >   
> >       static constexpr float toFloat(QuantizedType q)
> >       {
> > -             return fixedToFloatingPoint<I, F, float, QuantizedType>(q);
> > +             if constexpr (std::is_unsigned_v<T>)
> > +                     return static_cast<float>(q) / static_cast<float>(1 << F);
> > +
> > +             /*
> > +              * Recreate the upper bits in case of a negative number by shifting the sign
> > +              * bit from the fixed point to the first bit of the unsigned and then right shifting
> > +              * by the same amount which keeps the sign bit in place.
> > +              * This can be optimized by the compiler quite well.
> > +              */
> > +             static_assert(sizeof(int) >= sizeof(T));
> 
> I'm wondering how easily this limitation can be lifted, e.g. is
> 
>    // fromFloat
>    static_cast<T>(static_cast<std::make_signed_t<T>>(std::round(v * (1 << F)))) & BitMask
>   
> 
>    // toFloat
>    static_cast<T>(static_cast<std::make_unsigned_t<T>>(q) << remaining_bits) >> remaining_bits);
> 
> sufficient?
> 
> 
> > +
> > +             int remaining_bits = sizeof(int) * 8 - (I + F);
> 
> This could very well be a `constexpr` thing.
> 
> 
> > +             int t = static_cast<int>(static_cast<unsigned>(q) << remaining_bits) >> remaining_bits;
> > +             return static_cast<float>(t) / static_cast<float>(1 << F);
> >       }
> >   
> > -     static constexpr float min = fixedToFloatingPoint<I, F, float>(qmin);
> > -     static constexpr float max = fixedToFloatingPoint<I, F, float>(qmax);
> > +     static constexpr float min = toFloat(qmin);
> > +     static constexpr float max = toFloat(qmax);
> >   
> >       /* Conversion functions required by Quantized<Traits> */
> >       static constexpr QuantizedType fromFloat(float v)
> >       {
> >               v = std::clamp(v, min, max);
> > -             return floatingToFixedPoint<I, F, QuantizedType, float>(v);
> > +
> > +             /*
> > +              * The intermediate cast to int is needed on arm platforms to properly
> > +              * cast negative values. See
> > +              * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
> > +              */
> > +             return static_cast<T>(static_cast<int>(std::round(v * (1 << F)))) & BitMask;
> >       }
> >   };
> >   
>

Patch
diff mbox series

diff --git a/src/ipa/libipa/fixedpoint.cpp b/src/ipa/libipa/fixedpoint.cpp
index 97cdd9b3f7aa..f2c93b1cf706 100644
--- a/src/ipa/libipa/fixedpoint.cpp
+++ b/src/ipa/libipa/fixedpoint.cpp
@@ -15,28 +15,6 @@  namespace libcamera {
 
 namespace ipa {
 
-/**
- * \fn R floatingToFixedPoint(T number)
- * \brief Convert a floating point number to a fixed-point representation
- * \tparam I Bit width of the integer part of the fixed-point
- * \tparam F Bit width of the fractional part of the fixed-point
- * \tparam R Return type of the fixed-point representation
- * \tparam T Input type of the floating point representation
- * \param number The floating point number to convert to fixed point
- * \return The converted value
- */
-
-/**
- * \fn R fixedToFloatingPoint(T number)
- * \brief Convert a fixed-point number to a floating point representation
- * \tparam I Bit width of the integer part of the fixed-point
- * \tparam F Bit width of the fractional part of the fixed-point
- * \tparam R Return type of the floating point representation
- * \tparam T Input type of the fixed-point representation
- * \param number The fixed point number to convert to floating point
- * \return The converted value
- */
-
 /**
  * \struct libcamera::ipa::FixedPointQTraits
  * \brief Traits type implementing fixed-point quantisation conversions
diff --git a/src/ipa/libipa/fixedpoint.h b/src/ipa/libipa/fixedpoint.h
index 1e984350111c..fe75af16bb54 100644
--- a/src/ipa/libipa/fixedpoint.h
+++ b/src/ipa/libipa/fixedpoint.h
@@ -16,55 +16,6 @@  namespace libcamera {
 
 namespace ipa {
 
-#ifndef __DOXYGEN__
-template<unsigned int I, unsigned int F, typename R, typename T,
-	 std::enable_if_t<std::is_integral_v<R> &&
-			  std::is_floating_point_v<T>> * = nullptr>
-#else
-template<unsigned int I, unsigned int F, typename R, typename T>
-#endif
-constexpr R floatingToFixedPoint(T number)
-{
-	static_assert(sizeof(int) >= sizeof(R));
-	static_assert(I + F <= sizeof(R) * 8);
-
-	/*
-	 * The intermediate cast to int is needed on arm platforms to properly
-	 * cast negative values. See
-	 * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
-	 */
-	R mask = (1 << (F + I)) - 1;
-	R frac = static_cast<R>(static_cast<int>(std::round(number * (1 << F)))) & mask;
-
-	return frac;
-}
-
-#ifndef __DOXYGEN__
-template<unsigned int I, unsigned int F, typename R, typename T,
-	 std::enable_if_t<std::is_floating_point_v<R> &&
-			  std::is_integral_v<T>> * = nullptr>
-#else
-template<unsigned int I, unsigned int F, typename R, typename T>
-#endif
-constexpr R fixedToFloatingPoint(T number)
-{
-	static_assert(sizeof(int) >= sizeof(T));
-	static_assert(I + F <= sizeof(T) * 8);
-
-	if constexpr (std::is_unsigned_v<T>)
-		return static_cast<R>(number) / static_cast<R>(1 << F);
-
-	/*
-	 * Recreate the upper bits in case of a negative number by shifting the sign
-	 * bit from the fixed point to the first bit of the unsigned and then right shifting
-	 * by the same amount which keeps the sign bit in place.
-	 * This can be optimized by the compiler quite well.
-	 */
-	int remaining_bits = sizeof(int) * 8 - (I + F);
-	int t = static_cast<int>(static_cast<unsigned>(number) << remaining_bits) >> remaining_bits;
-	return static_cast<R>(t) / static_cast<R>(1 << F);
-}
-
 template<unsigned int I, unsigned int F, typename T>
 struct FixedPointQTraits {
 	static_assert(std::is_integral_v<T>, "FixedPointQTraits: T must be integral");
@@ -88,17 +39,36 @@  struct FixedPointQTraits {
 
 	static constexpr float toFloat(QuantizedType q)
 	{
-		return fixedToFloatingPoint<I, F, float, QuantizedType>(q);
+		if constexpr (std::is_unsigned_v<T>)
+			return static_cast<float>(q) / static_cast<float>(1 << F);
+
+		/*
+		 * Recreate the upper bits in case of a negative number by shifting the sign
+		 * bit from the fixed point to the first bit of the unsigned and then right shifting
+		 * by the same amount which keeps the sign bit in place.
+		 * This can be optimized by the compiler quite well.
+		 */
+		static_assert(sizeof(int) >= sizeof(T));
+
+		int remaining_bits = sizeof(int) * 8 - (I + F);
+		int t = static_cast<int>(static_cast<unsigned>(q) << remaining_bits) >> remaining_bits;
+		return static_cast<float>(t) / static_cast<float>(1 << F);
 	}
 
-	static constexpr float min = fixedToFloatingPoint<I, F, float>(qmin);
-	static constexpr float max = fixedToFloatingPoint<I, F, float>(qmax);
+	static constexpr float min = toFloat(qmin);
+	static constexpr float max = toFloat(qmax);
 
 	/* Conversion functions required by Quantized<Traits> */
 	static constexpr QuantizedType fromFloat(float v)
 	{
 		v = std::clamp(v, min, max);
-		return floatingToFixedPoint<I, F, QuantizedType, float>(v);
+
+		/*
+		 * The intermediate cast to int is needed on arm platforms to properly
+		 * cast negative values. See
+		 * https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
+		 */
+		return static_cast<T>(static_cast<int>(std::round(v * (1 << F)))) & BitMask;
 	}
 };