From b10941c08db89f52038e19ee8a29f26d7808a448 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 17 May 2007 20:09:24 +0200
Subject: [PATCH] volume_id: add and export string encoding function

Needed for mount(8) to lookup LABEL=* in /dev/disk/by-label/*. Characters
not suited for a filename will be escaped and the encoded LABEL=* string
is able to find the corresponding symlink.
---
 extras/volume_id/lib/Makefile         |   4 +-
 extras/volume_id/lib/exported_symbols |   1 +
 extras/volume_id/lib/libvolume_id.h   |   1 +
 extras/volume_id/lib/util.c           | 123 ++++++++++++++++++++++++++
 extras/volume_id/lib/util.h           |   3 +
 extras/volume_id/lib/volume_id.c      |  54 +++++++++++
 6 files changed, 184 insertions(+), 2 deletions(-)

diff --git a/extras/volume_id/lib/Makefile b/extras/volume_id/lib/Makefile
index 17992ba02..328758d03 100644
--- a/extras/volume_id/lib/Makefile
+++ b/extras/volume_id/lib/Makefile
@@ -13,7 +13,7 @@ INSTALL_DATA  = ${INSTALL} -m 644
 INSTALL_LIB = ${INSTALL} -m 755
 
 SHLIB_CUR = 0
-SHLIB_REV = 77
+SHLIB_REV = 78
 SHLIB_AGE = 0
 SHLIB = libvolume_id.so.$(SHLIB_CUR).$(SHLIB_REV).$(SHLIB_AGE)
 
@@ -81,7 +81,7 @@ libvolume_id.a: $(HEADERS) $(OBJS)
 	$(E) "  RANLIB  " $@
 	$(Q) $(RANLIB) $@
 
-$(SHLIB): $(HEADERS) $(addprefix .shlib/,$(OBJS))
+$(SHLIB): $(HEADERS) exported_symbols $(addprefix .shlib/,$(OBJS))
 	$(E) "  CC      " $@
 	$(Q) $(CC) -shared $(CFLAGS) -o $@ \
 		-Wl,-soname,libvolume_id.so.$(SHLIB_CUR),--version-script,exported_symbols \
diff --git a/extras/volume_id/lib/exported_symbols b/extras/volume_id/lib/exported_symbols
index 3422152ed..05b6e52bc 100644
--- a/extras/volume_id/lib/exported_symbols
+++ b/extras/volume_id/lib/exported_symbols
@@ -16,6 +16,7 @@
 	volume_id_probe_raid;
 	volume_id_all_probers;
 	volume_id_get_prober_by_type;
+	volume_id_encode_string;
 
 	volume_id_open_node;
 
diff --git a/extras/volume_id/lib/libvolume_id.h b/extras/volume_id/lib/libvolume_id.h
index c0ab29e0f..19aee68d0 100644
--- a/extras/volume_id/lib/libvolume_id.h
+++ b/extras/volume_id/lib/libvolume_id.h
@@ -40,6 +40,7 @@ extern int volume_id_get_uuid_raw(struct volume_id *id, const uint8_t **uuid, si
 extern int volume_id_get_usage(struct volume_id *id, const char **usage);
 extern int volume_id_get_type(struct volume_id *id, const char **type);
 extern int volume_id_get_type_version(struct volume_id *id, const char **type_version);
+extern int volume_id_encode_string(const char *str, char *str_enc, size_t len);
 
 /*
  * Note: everything below will be made private or removed from
diff --git a/extras/volume_id/lib/util.c b/extras/volume_id/lib/util.c
index 54d9fd0d8..eaaececad 100644
--- a/extras/volume_id/lib/util.c
+++ b/extras/volume_id/lib/util.c
@@ -28,6 +28,129 @@
 #include "libvolume_id.h"
 #include "util.h"
 
+/* count of characters used to encode one unicode char */
+static int utf8_encoded_expected_len(const char *str)
+{
+	unsigned char c = (unsigned char)str[0];
+
+	if (c < 0x80)
+		return 1;
+	if ((c & 0xe0) == 0xc0)
+		return 2;
+	if ((c & 0xf0) == 0xe0)
+		return 3;
+	if ((c & 0xf8) == 0xf0)
+		return 4;
+	if ((c & 0xfc) == 0xf8)
+		return 5;
+	if ((c & 0xfe) == 0xfc)
+		return 6;
+	return 0;
+}
+
+/* decode one unicode char */
+static int utf8_encoded_to_unichar(const char *str)
+{
+	int unichar;
+	int len;
+	int i;
+
+	len = utf8_encoded_expected_len(str);
+	switch (len) {
+	case 1:
+		return (int)str[0];
+	case 2:
+		unichar = str[0] & 0x1f;
+		break;
+	case 3:
+		unichar = (int)str[0] & 0x0f;
+		break;
+	case 4:
+		unichar = (int)str[0] & 0x07;
+		break;
+	case 5:
+		unichar = (int)str[0] & 0x03;
+		break;
+	case 6:
+		unichar = (int)str[0] & 0x01;
+		break;
+	default:
+		return -1;
+	}
+
+	for (i = 1; i < len; i++) {
+		if (((int)str[i] & 0xc0) != 0x80)
+			return -1;
+		unichar <<= 6;
+		unichar |= (int)str[i] & 0x3f;
+	}
+
+	return unichar;
+}
+
+/* expected size used to encode one unicode char */
+static int utf8_unichar_to_encoded_len(int unichar)
+{
+	if (unichar < 0x80)
+		return 1;
+	if (unichar < 0x800)
+		return 2;
+	if (unichar < 0x10000)
+		return 3;
+	if (unichar < 0x200000)
+		return 4;
+	if (unichar < 0x4000000)
+		return 5;
+	return 6;
+}
+
+/* check if unicode char has a valid numeric range */
+static int utf8_unichar_valid_range(int unichar)
+{
+	if (unichar > 0x10ffff)
+		return 0;
+	if ((unichar & 0xfffff800) == 0xd800)
+		return 0;
+	if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
+		return 0;
+	if ((unichar & 0xffff) == 0xffff)
+		return 0;
+	return 1;
+}
+
+/* validate one encoded unicode char and return its length */
+int volume_id_utf8_encoded_valid_unichar(const char *str)
+{
+	int len;
+	int unichar;
+	int i;
+
+	len = utf8_encoded_expected_len(str);
+	if (len == 0)
+		return -1;
+
+	/* ascii is valid */
+	if (len == 1)
+		return 1;
+
+	/* check if expected encoded chars are available */
+	for (i = 0; i < len; i++)
+		if ((str[i] & 0x80) != 0x80)
+			return -1;
+
+	unichar = utf8_encoded_to_unichar(str);
+
+	/* check if encoded length matches encoded value */
+	if (utf8_unichar_to_encoded_len(unichar) != len)
+		return -1;
+
+	/* check if value has valid range */
+	if (!utf8_unichar_valid_range(unichar))
+		return -1;
+
+	return len;
+}
+
 void volume_id_set_unicode16(char *str, size_t len, const uint8_t *buf, enum endian endianess, size_t count)
 {
 	unsigned int i, j;
diff --git a/extras/volume_id/lib/util.h b/extras/volume_id/lib/util.h
index 1206116ce..6ed7b48b5 100644
--- a/extras/volume_id/lib/util.h
+++ b/extras/volume_id/lib/util.h
@@ -23,6 +23,8 @@
 #include <byteswap.h>
 #include <syslog.h>
 
+#define ALLOWED_CHARS				"#+-.:=@_%"
+
 #ifndef PACKED
 #define PACKED				__attribute__((packed))
 #endif
@@ -77,6 +79,7 @@ enum endian {
 	BE = 1
 };
 
+extern int volume_id_utf8_encoded_valid_unichar(const char *str);
 extern void volume_id_set_unicode16(char *str, size_t len, const uint8_t *buf, enum endian endianess, size_t count);
 extern void volume_id_set_usage(struct volume_id *id, enum volume_id_usage usage_id);
 extern void volume_id_set_label_raw(struct volume_id *id, const uint8_t *buf, size_t count);
diff --git a/extras/volume_id/lib/volume_id.c b/extras/volume_id/lib/volume_id.c
index 15e9cda4d..59303d5bf 100644
--- a/extras/volume_id/lib/volume_id.c
+++ b/extras/volume_id/lib/volume_id.c
@@ -276,6 +276,60 @@ int volume_id_get_type_version(struct volume_id *id, const char **type_version)
 	return 1;
 }
 
+static int needs_encoding(const char c)
+{
+	if ((c >= '0' && c <= '9') ||
+	    (c >= 'A' && c <= 'Z') ||
+	    (c >= 'a' && c <= 'z') ||
+	    strchr(ALLOWED_CHARS, c))
+		return 0;
+	return 1;
+}
+
+/**
+ * volume_id_encode_string:
+ * @str: Input string to be encoded.
+ * @str_enc: Target string to store the encoded input.
+ * @len: Location to store the encoded string. The target string,
+ * which may be four times as long as the input string.
+ *
+ * Encode all potentially unsafe characters of a string to the
+ * corresponding hex value prefixed by '\x'.
+ *
+ * Returns: 1 if the entire string was copied, 0 otherwise.
+ **/
+int volume_id_encode_string(const char *str, char *str_enc, size_t len)
+{
+	size_t i, j;
+
+	if (str == NULL || str_enc == NULL || len == 0)
+		return 0;
+
+	str_enc[0] = '\0';
+	for (i = 0, j = 0; str[i] != '\0'; i++) {
+		int seqlen;
+
+		seqlen = volume_id_utf8_encoded_valid_unichar(&str[i]);
+		if (seqlen > 1) {
+			memcpy(&str_enc[j], &str[i], seqlen);
+			j += seqlen;
+			i += (seqlen-1);
+		} else if (str[i] == '\\' || needs_encoding(str[i])) {
+			sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+			j += 4;
+		} else {
+			str_enc[j] = str[i];
+			j++;
+		}
+		if (j+3 >= len)
+			goto err;
+	}
+	str_enc[j] = '\0';
+	return 1;
+err:
+	return 0;
+}
+
 /**
  * volume_id_probe_raid:
  * @id: Probing context.
-- 
2.30.2