[libcamera-devel,v2,1/9] android: jpeg: exif: Expand setString to support different encodings
diff mbox series

Message ID 20210121101549.134574-2-paul.elder@ideasonboard.com
State Superseded
Delegated to: Paul Elder
Headers show
Series
  • Fill in android result metadata and EXIF tags
Related show

Commit Message

Paul Elder Jan. 21, 2021, 10:15 a.m. UTC
GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16.
Expand setString to take an encoding when the field type is undefined.
Update callers accordingly.

Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>

---
Changes in v2:
- moved from utils into exif
- support no-encoding
---
 src/android/jpeg/exif.cpp | 107 +++++++++++++++++++++++++++++++-------
 src/android/jpeg/exif.h   |  12 ++++-
 2 files changed, 99 insertions(+), 20 deletions(-)

Comments

Laurent Pinchart Jan. 21, 2021, 7:35 p.m. UTC | #1
Hi Paul,

Thank you for the patch.

On Thu, Jan 21, 2021 at 07:15:41PM +0900, Paul Elder wrote:
> GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16.
> Expand setString to take an encoding when the field type is undefined.
> Update callers accordingly.
> 
> Signed-off-by: Paul Elder <paul.elder@ideasonboard.com>
> 
> ---
> Changes in v2:
> - moved from utils into exif
> - support no-encoding
> ---
>  src/android/jpeg/exif.cpp | 107 +++++++++++++++++++++++++++++++-------
>  src/android/jpeg/exif.h   |  12 ++++-
>  2 files changed, 99 insertions(+), 20 deletions(-)
> 
> diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp
> index 33b3fa7f..cff366a4 100644
> --- a/src/android/jpeg/exif.cpp
> +++ b/src/android/jpeg/exif.cpp
> @@ -7,6 +7,9 @@
>  
>  #include "exif.h"
>  
> +#include <map>
> +#include <uchar.h>
> +
>  #include "libcamera/internal/log.h"
>  #include "libcamera/internal/utils.h"
>  
> @@ -64,7 +67,7 @@ Exif::Exif()
>  	exif_data_set_byte_order(data_, order_);
>  
>  	setString(EXIF_IFD_EXIF, EXIF_TAG_EXIF_VERSION,
> -		  EXIF_FORMAT_UNDEFINED, "0231");
> +		  EXIF_FORMAT_UNDEFINED, NoEncoding, "0231");
>  
>  	/* Create the mandatory EXIF fields with default data. */
>  	exif_data_fix(data_);
> @@ -178,10 +181,18 @@ void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item)
>  	exif_entry_unref(entry);
>  }
>  
> -void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item)
> +static const std::map<Exif::StringEncoding, std::vector<uint8_t>> StringEncodingCodes = {

static const std::map<Exif::StringEncoding, std::array<uint8_t, 8>> StringEncodingCodes

would be more efficient (if it doesn't cause too many issues in the code
below).

And s/StringEncodingCodes/stringEncodingCodes/

> +	{ Exif::ASCII,     { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } },
> +	{ Exif::JIS,       { 0x4a, 0x49, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00 } },
> +	{ Exif::Unicode,   { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } },
> +	{ Exif::Undefined, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },

Should we drop JIS and Undefined ? It would be different if we were
parsing EXIF data, for as an EXIF generator, I doubt we'll ever generate
a string in any of those encodings.

> +};
> +
> +void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> +		     StringEncoding enc, const std::string &item)

s/enc/encoding/ ?

>  {
>  	std::string ascii;
> -	size_t length;
> +	size_t length = 0;
>  	const char *str;
>  
>  	if (format == EXIF_FORMAT_ASCII) {
> @@ -190,14 +201,44 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
>  
>  		/* Pad 1 extra byte to null-terminate the ASCII string. */
>  		length = ascii.length() + 1;
> -	} else {
> -		str = item.c_str();
> -
> -		/*
> -		 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
> -		 * are not null-terminated.
> -		 */
> -		length = item.length();
> +	} else if (format == EXIF_FORMAT_UNDEFINED) {

Any reason to replace else with else if ? If you keep a plain else, you
won't have to initialize length to 0.

> +		std::vector<uint8_t> buf;
> +		const std::vector<uint8_t> &constbuf = buf;

I don't think you need constbuf.

You can move the encoding lookup out of the switch:

		auto encodingString = stringEncodingCodes.find(enc);
		if (encodingString != stringEncodingCodes.end())
			buf = {
				encodingString->second.begin(),
				encodingString->second.end()
			};

(This handles the conversion from std::array to std::vector)

> +		std::u16string u16str;
> +
> +		switch (enc) {
> +		case Unicode:
> +			buf = StringEncodingCodes.find(enc)->second;
> +			u16str = utf8ToUtf16(item);
> +
> +			/* Little-endian */

Should this depend on the endianness of the EXIF ?

> +			buf.resize(8 + u16str.size() * 2);
> +			for (size_t i = 0; i < u16str.size(); i++) {

			for (char16_t c : u16str) {

> +				buf[8 + 2 * i] = (u16str[i] & 0xff);
> +				buf[8 + 2 * i + 1] = ((u16str[i] >> 8) & 0xff);
> +			}
> +
> +			str = reinterpret_cast<const char *>(constbuf.data());
> +			length = buf.size();
> +
> +			break;
> +		/* \todo Support JIS */
> +		case JIS:
> +		case ASCII:
> +		case Undefined:
> +			buf = StringEncodingCodes.find(enc)->second;
> +			[[fallthrough]];

		case NoEncoding:

and you should be able to drop the default case.

> +		default:
> +			buf.insert(buf.end(), item.begin(), item.end());
> +			str = reinterpret_cast<const char *>(constbuf.data());
> +
> +			/*
> +			 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
> +			 * are not null-terminated.
> +			 */
> +			length = buf.size();
> +			break;
> +		}

Once you exit this scope, buf will be destroyed, and str becomes an
invalid pointer. You need to declare buf in the top level scope.

>  	}
>  
>  	ExifEntry *entry = createEntry(ifd, tag, format, length, length);
> @@ -210,12 +251,12 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
>  
>  void Exif::setMake(const std::string &make)
>  {
> -	setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, make);
> +	setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, NoEncoding, make);
>  }
>  
>  void Exif::setModel(const std::string &model)
>  {
> -	setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, model);
> +	setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, NoEncoding, model);
>  }
>  
>  void Exif::setSize(const Size &size)
> @@ -233,9 +274,9 @@ void Exif::setTimestamp(time_t timestamp)
>  	strftime(str, sizeof(str), "%Y:%m:%d %H:%M:%S", &tm);
>  	std::string ts(str);
>  
> -	setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, ts);
> -	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, ts);
> -	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, ts);
> +	setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, NoEncoding, ts);
> +	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, NoEncoding, ts);
> +	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, NoEncoding, ts);
>  
>  	/* Query and set timezone information if available. */
>  	int r = strftime(str, sizeof(str), "%z", &tm);
> @@ -244,13 +285,13 @@ void Exif::setTimestamp(time_t timestamp)
>  		tz.insert(3, 1, ':');
>  		setString(EXIF_IFD_EXIF,
>  			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME),
> -			  EXIF_FORMAT_ASCII, tz);
> +			  EXIF_FORMAT_ASCII, NoEncoding, tz);
>  		setString(EXIF_IFD_EXIF,
>  			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME_ORIGINAL),
> -			  EXIF_FORMAT_ASCII, tz);
> +			  EXIF_FORMAT_ASCII, NoEncoding, tz);
>  		setString(EXIF_IFD_EXIF,
>  			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME_DIGITIZED),
> -			  EXIF_FORMAT_ASCII, tz);
> +			  EXIF_FORMAT_ASCII, NoEncoding, tz);
>  	}
>  }
>  
> @@ -290,6 +331,34 @@ void Exif::setThumbnail(Span<const unsigned char> thumbnail,
>  	setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression);
>  }
>  
> +/**
> + * \brief Convert UTF-8 string to UTF-16 string
> + * \param[in] str String to convert
> + *
> + * \return \a str in UTF-16
> + */
> +std::u16string Exif::utf8ToUtf16(const std::string &str)
> +{
> +	std::mbstate_t state{};

This should be mbstate_t as you're including uchar.h.

> +	char16_t c16;
> +	const char *ptr = str.data();
> +	const char *end = ptr + str.size();
> +
> +	std::u16string ret;
> +	while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) {
> +		if (rc == static_cast<size_t>(-2) ||
> +		    rc == static_cast<size_t>(-1))
> +			break;
> +
> +		ret.push_back(c16);
> +
> +		if (rc > 0)
> +			ptr += rc;
> +	}
> +
> +	return ret;
> +}
> +
>  [[nodiscard]] int Exif::generate()
>  {
>  	if (exifData_) {
> diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h
> index 5cab4559..db98ba63 100644
> --- a/src/android/jpeg/exif.h
> +++ b/src/android/jpeg/exif.h
> @@ -26,6 +26,14 @@ public:
>  		JPEG = 6,
>  	};
>  
> +	enum StringEncoding {
> +		NoEncoding = 0,
> +		ASCII = 1,
> +		JIS = 2,
> +		Unicode = 3,
> +		Undefined = 4,
> +	};
> +
>  	void setMake(const std::string &make);
>  	void setModel(const std::string &model);
>  
> @@ -46,9 +54,11 @@ private:
>  	void setShort(ExifIfd ifd, ExifTag tag, uint16_t item);
>  	void setLong(ExifIfd ifd, ExifTag tag, uint32_t item);
>  	void setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> -		       const std::string &item);
> +		       StringEncoding enc, const std::string &item);

Would it make sense to move the encoding parameter last, to default it
to NoEncoding ? That way most of the callers (and in particular the ones
that pass EXIF_FORMAT_ASCII for format) wouldn't need to set enc to
NoEncoding explicitly.

>  	void setRational(ExifIfd ifd, ExifTag tag, ExifRational item);
>  
> +	std::u16string utf8ToUtf16(const std::string &str);
> +
>  	bool valid_;
>  
>  	ExifData *data_;

Patch
diff mbox series

diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp
index 33b3fa7f..cff366a4 100644
--- a/src/android/jpeg/exif.cpp
+++ b/src/android/jpeg/exif.cpp
@@ -7,6 +7,9 @@ 
 
 #include "exif.h"
 
+#include <map>
+#include <uchar.h>
+
 #include "libcamera/internal/log.h"
 #include "libcamera/internal/utils.h"
 
@@ -64,7 +67,7 @@  Exif::Exif()
 	exif_data_set_byte_order(data_, order_);
 
 	setString(EXIF_IFD_EXIF, EXIF_TAG_EXIF_VERSION,
-		  EXIF_FORMAT_UNDEFINED, "0231");
+		  EXIF_FORMAT_UNDEFINED, NoEncoding, "0231");
 
 	/* Create the mandatory EXIF fields with default data. */
 	exif_data_fix(data_);
@@ -178,10 +181,18 @@  void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item)
 	exif_entry_unref(entry);
 }
 
-void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item)
+static const std::map<Exif::StringEncoding, std::vector<uint8_t>> StringEncodingCodes = {
+	{ Exif::ASCII,     { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } },
+	{ Exif::JIS,       { 0x4a, 0x49, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+	{ Exif::Unicode,   { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } },
+	{ Exif::Undefined, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+};
+
+void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
+		     StringEncoding enc, const std::string &item)
 {
 	std::string ascii;
-	size_t length;
+	size_t length = 0;
 	const char *str;
 
 	if (format == EXIF_FORMAT_ASCII) {
@@ -190,14 +201,44 @@  void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
 
 		/* Pad 1 extra byte to null-terminate the ASCII string. */
 		length = ascii.length() + 1;
-	} else {
-		str = item.c_str();
-
-		/*
-		 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
-		 * are not null-terminated.
-		 */
-		length = item.length();
+	} else if (format == EXIF_FORMAT_UNDEFINED) {
+		std::vector<uint8_t> buf;
+		const std::vector<uint8_t> &constbuf = buf;
+		std::u16string u16str;
+
+		switch (enc) {
+		case Unicode:
+			buf = StringEncodingCodes.find(enc)->second;
+			u16str = utf8ToUtf16(item);
+
+			/* Little-endian */
+			buf.resize(8 + u16str.size() * 2);
+			for (size_t i = 0; i < u16str.size(); i++) {
+				buf[8 + 2 * i] = (u16str[i] & 0xff);
+				buf[8 + 2 * i + 1] = ((u16str[i] >> 8) & 0xff);
+			}
+
+			str = reinterpret_cast<const char *>(constbuf.data());
+			length = buf.size();
+
+			break;
+		/* \todo Support JIS */
+		case JIS:
+		case ASCII:
+		case Undefined:
+			buf = StringEncodingCodes.find(enc)->second;
+			[[fallthrough]];
+		default:
+			buf.insert(buf.end(), item.begin(), item.end());
+			str = reinterpret_cast<const char *>(constbuf.data());
+
+			/*
+			 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
+			 * are not null-terminated.
+			 */
+			length = buf.size();
+			break;
+		}
 	}
 
 	ExifEntry *entry = createEntry(ifd, tag, format, length, length);
@@ -210,12 +251,12 @@  void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
 
 void Exif::setMake(const std::string &make)
 {
-	setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, make);
+	setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, NoEncoding, make);
 }
 
 void Exif::setModel(const std::string &model)
 {
-	setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, model);
+	setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, NoEncoding, model);
 }
 
 void Exif::setSize(const Size &size)
@@ -233,9 +274,9 @@  void Exif::setTimestamp(time_t timestamp)
 	strftime(str, sizeof(str), "%Y:%m:%d %H:%M:%S", &tm);
 	std::string ts(str);
 
-	setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, ts);
-	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, ts);
-	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, ts);
+	setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, NoEncoding, ts);
+	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, NoEncoding, ts);
+	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, NoEncoding, ts);
 
 	/* Query and set timezone information if available. */
 	int r = strftime(str, sizeof(str), "%z", &tm);
@@ -244,13 +285,13 @@  void Exif::setTimestamp(time_t timestamp)
 		tz.insert(3, 1, ':');
 		setString(EXIF_IFD_EXIF,
 			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME),
-			  EXIF_FORMAT_ASCII, tz);
+			  EXIF_FORMAT_ASCII, NoEncoding, tz);
 		setString(EXIF_IFD_EXIF,
 			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME_ORIGINAL),
-			  EXIF_FORMAT_ASCII, tz);
+			  EXIF_FORMAT_ASCII, NoEncoding, tz);
 		setString(EXIF_IFD_EXIF,
 			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME_DIGITIZED),
-			  EXIF_FORMAT_ASCII, tz);
+			  EXIF_FORMAT_ASCII, NoEncoding, tz);
 	}
 }
 
@@ -290,6 +331,34 @@  void Exif::setThumbnail(Span<const unsigned char> thumbnail,
 	setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression);
 }
 
+/**
+ * \brief Convert UTF-8 string to UTF-16 string
+ * \param[in] str String to convert
+ *
+ * \return \a str in UTF-16
+ */
+std::u16string Exif::utf8ToUtf16(const std::string &str)
+{
+	std::mbstate_t state{};
+	char16_t c16;
+	const char *ptr = str.data();
+	const char *end = ptr + str.size();
+
+	std::u16string ret;
+	while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) {
+		if (rc == static_cast<size_t>(-2) ||
+		    rc == static_cast<size_t>(-1))
+			break;
+
+		ret.push_back(c16);
+
+		if (rc > 0)
+			ptr += rc;
+	}
+
+	return ret;
+}
+
 [[nodiscard]] int Exif::generate()
 {
 	if (exifData_) {
diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h
index 5cab4559..db98ba63 100644
--- a/src/android/jpeg/exif.h
+++ b/src/android/jpeg/exif.h
@@ -26,6 +26,14 @@  public:
 		JPEG = 6,
 	};
 
+	enum StringEncoding {
+		NoEncoding = 0,
+		ASCII = 1,
+		JIS = 2,
+		Unicode = 3,
+		Undefined = 4,
+	};
+
 	void setMake(const std::string &make);
 	void setModel(const std::string &model);
 
@@ -46,9 +54,11 @@  private:
 	void setShort(ExifIfd ifd, ExifTag tag, uint16_t item);
 	void setLong(ExifIfd ifd, ExifTag tag, uint32_t item);
 	void setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
-		       const std::string &item);
+		       StringEncoding enc, const std::string &item);
 	void setRational(ExifIfd ifd, ExifTag tag, ExifRational item);
 
+	std::u16string utf8ToUtf16(const std::string &str);
+
 	bool valid_;
 
 	ExifData *data_;