Use new international mailbox names implementation

A new custom implementation of international mailbox names has replaced the previous iconv-based solution.
2024-12-21 23:28:49 -05:00 · 2012-07-22 15:41:28 +03:00 · 2012-07-22 15:41:28 +03:00 · 066ca99e36
commit 066ca99e36
parent 362a123cd4
1 changed files with 236 additions and 155 deletions
--- a/src/namespace.c
+++ b/src/namespace.c
@ -1,6 +1,5 @@
 #include <stdio.h>
 #include <string.h>
-#include <iconv.h>
 #include <errno.h>

 #include "imapfilter.h"
@ -11,6 +10,10 @@ buffer nbuf;			/* Namespace buffer. */
 buffer cbuf;			/* Conversion buffer. */


+static const char base64[] = 
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+
 const char *apply_conversion(const char *mbox);
 const char *reverse_conversion(const char *mbox);

@ -95,113 +98,129 @@ reverse_namespace(const char *mbox, char *prefix, char delim)
 const char *
 apply_conversion(const char *mbox)
 {
-	iconv_t cd;
-	char *inbuf, *outbuf;
-	size_t inlen, outlen;
-	char *c, *shift;
-	unsigned char *r, *w;
+	unsigned char *c, *out;
+	unsigned char ucp[4], ucplast, ucptemp;
+	int padding, shift;

 	buffer_check(&nbuf, strlen(mbox)); 
 	buffer_reset(&nbuf);
 	xstrncpy(nbuf.data, mbox, nbuf.size);
 	nbuf.len = strlen(nbuf.data);
-	buffer_check(&cbuf, nbuf.len * 5);
+	buffer_check(&cbuf, nbuf.len * 4);
 	buffer_reset(&cbuf);

-	r = (unsigned char *)nbuf.data;
-	w = (unsigned char *)cbuf.data;
-	inbuf = outbuf = NULL;
-	inlen = outlen = 0;
-	while (*r != '\0') {
-		/* Skip non-printable ASCII characters. */
-		if (*r < 0x20 || *r == 0x7F) {
-			r++;
+	c = (unsigned char *)nbuf.data;
+	out = (unsigned char *)cbuf.data;
+
+	memset((void *)ucp, 0, sizeof(ucp));
+	ucplast = ucptemp = 0;
+	padding = shift = 0;
+
+	while (*c != '\0' || shift > 0) {
+		if (shift > 0 && *c <= 0x7F) {
+			/* Last character so do Base64 padding. */
+			if (padding == 2) {
+				*out++ = base64[ucplast << 2 & 0x3C];
+			} else if (padding == 4) {
+				*out++ = base64[ucplast << 4 & 0x30];
+			}
+			*out++ = '-';
+			padding = 0;
+			shift = 0;
 			continue;
 		}
+
 		/* Escape shift character for modified UTF-7. */
-		if (*r == '&') {
-			*w++ = '&';
-			*w++ = '-';
-			r++;
+		if (*c == '&') {
+			*out++ = '&';
+			*out++ = '-';
+			c++;
+			continue;
+
+		/* Copy all ASCII printable characters. */
+		} else if ((*c >= 0x20 && *c <= 0x7e)) {
+			*out++ = *c;
+			c++;
 			continue;
 		}
-		/* Copy ASCII printable characters. */
-		if (*r >= 0x20 && *r <= 0x7E) {
-			*w++ = *r++;
-			continue;
-		}
-		/* UTF-8 sequence will follow. */
-		if (inbuf == NULL) {
-			inbuf = (char *)r;
-			inlen = 0;
-		}
-		if ((*r & 0xE0) == 0xC0) {	/* Two byte UTF-8. */
-			inlen += 2;
-			r += 2;
-		} else if ((*r & 0xF0) == 0xE0) {	/* Three byte UTF-8. */
-			inlen += 3;
-			r += 3;
-		} else if ((*r & 0xF8) == 0xF0) {	/* Four byte UTF-8. */
-			inlen += 4;
-			r += 4;
-		}
-		/* UTF-8 sequence has ended, convert it to UTF-7. */
-		if (inbuf != NULL && (*r <= 0x7F || *r == '\0')) {
-			outbuf = (char *)w;
-			outlen = cbuf.size - (outbuf - cbuf.data);

-			cd = iconv_open("UTF-7", "");
-			if (cd == (iconv_t)-1) {
-				error("converting mailbox name; %s\n",
-				    strerror(errno));
-				return mbox;
-			}
-			while (inlen > 0) {
-				if (iconv(cd, &inbuf, &inlen, &outbuf, &outlen)
-				    == -1) {
-					if (errno == E2BIG) {
-						buffer_check(&cbuf, cbuf.size *
-						    2);
-						break;
-					} else {
-						error("converting mailbox name;"
-						    "%s\n", strerror(errno));
-						return mbox;
-					}
-				} else {
-					iconv(cd, NULL, NULL, &outbuf, &outlen);
-				}
-			}
-			iconv_close(cd);
+		/* Non-ASCII UTF-8 characters follow. */
+		if (shift == 0)
+			*out++ = '&';
+		/* Convert UTF-8 characters to Unicode code point. */
+		if ((*c & 0xE0) == 0xC0) {
+			shift = 2;
+			ucp[0] = 0x07 & *c >> 2;
+			ucp[1] = (*c << 6 & 0xC0) | (*(c + 1) & 0x3F);
+			c += 2;
+		} else if ((*c & 0xF0) == 0xE0) {
+			shift = 3;
+			ucp[0] = (*c << 4 & 0xF0) | (*(c + 1) >> 2 & 0x0F);
+			ucp[1] = (*(c + 1) << 6 & 0xC0) | (*(c + 2) & 0x3F);
+			c += 3;
+		} else if ((*c & 0xF8) == 0xF0) {
+			shift = 4;
+			ucptemp = ((*c << 2 & 0x1C) | (*(c + 1) >> 4 & 0x03)) -
+			    0x01;
+			ucp[0] = (ucptemp >> 2 & 0x03) | 0xD8;
+			ucp[1] = (ucptemp << 6 & 0xC0) |
+			    (*(c + 1) << 2 & 0x3C) | (*(c + 2) >> 4 & 0x03);
+			ucp[2] = (*(c + 2) >> 2 & 0x03) | 0xDC;
+			ucp[3] = (*(c + 2) << 6 & 0xC0) | (*(c + 3) & 0x3F);
+			c += 4;
+		}

-			w = (unsigned char *)outbuf;
-			inbuf = outbuf = NULL;
-			inlen = outlen = 0;
+		/* Convert Unicode characters to UTF-7. */
+		if (padding == 0) {
+			*out++ = base64[ucp[0] >> 2 & 0x3F];
+			*out++ = base64[(ucp[0] << 4 & 0x30) |
+			    (ucp[1] >> 4 & 0x0F)];
+			if (shift == 4) {
+				ucplast = ucp[3];
+				*out++ = base64[(ucp[1] << 2 & 0x3C) |
+				    (ucp[2] >> 6 & 0x03)];
+				*out++ = base64[ucp[2] & 0x3F];
+				*out++ = base64[ucp[3] >> 2 & 0x3F];
+				padding = 4;
+			} else {
+				ucplast = ucp[1];
+				padding = 2;
+			}
+		} else if (padding == 2) {
+			*out++ = base64[(ucplast << 2 & 0x3C) |
+			    (ucp[0] >> 6 & 0x03)];
+			*out++ = base64[ucp[0] & 0x3F];
+			*out++ = base64[ucp[1] >> 2 & 0x3F];
+			if (shift == 4) {
+				*out++ = base64[(ucp[1] << 4 & 0x30) |
+				    (ucp[2] >> 4 & 0x0F)];
+				*out++ = base64[(ucp[2] << 2 & 0x3C) |
+				    (ucp[3] >> 6 & 0x03)];
+				*out++ = base64[ucp[3] & 0x3F];
+				padding = 0;
+			} else {
+				ucplast = ucp[1];
+				padding = 4;
+			}
+		} else if (padding == 4) {
+			*out++ = base64[(ucplast << 4 & 0x30) |
+			    (ucp[0] >> 4 & 0x0F)];
+			*out++ = base64[(ucp[0] << 2 & 0x3C) |
+			    (ucp[1] >> 6 & 0x03)];
+			if (shift == 4) {
+				ucplast = ucp[3];
+				*out++ = base64[ucp[1] & 0x3F];
+				*out++ = base64[ucp[2] >> 2 & 0x3F];
+				*out++ = base64[(ucp[2] << 4 & 0x30) |
+				    (ucp[3] >> 4 & 0x0F)];
+				padding = 2;
+			} else {
+				*out++ = base64[ucp[1] & 0x3F];
+				padding = 0;
+			}
 		}
 	}
-
-	if (*w != '\0')
-		*w = '\0';
-
-	/* Convert UTF-7 sequences to IMAP modified UTF-7. */
-	for (c = cbuf.data, shift = NULL; *c != '\0'; c++)
-		switch (*c) {
-		case '+':
-			*c = '&';
-			shift = c;
-			break;
-		case '-':
-			shift = NULL;
-			break;
-		case '/':
-			if (shift != NULL)
-				*c = ',';
-			break;
-		}
-	if (shift != NULL) {
-		*w++ = '-';
-		*w = '\0';
-	}
+	*out = '\0';

 	debug("conversion: '%s' -> '%s'\n", nbuf.data, cbuf.data);

@ -215,82 +234,144 @@ apply_conversion(const char *mbox)
 const char *
 reverse_conversion(const char *mbox)
 {
-	iconv_t cd;
-	char *inbuf, *outbuf;
-	size_t inlen, outlen;
-	char *c, *shift;
+	unsigned char *c, *out;
+	unsigned char ucp[4], ucptemp;
+	unsigned char b64[6], b64last;
+	int padding;
 	
 	buffer_check(&cbuf, strlen(mbox));
 	buffer_reset(&cbuf);
 	xstrncpy(cbuf.data, mbox, cbuf.size);
+	cbuf.len = strlen(cbuf.data);
+	buffer_check(&nbuf, cbuf.len);
+	buffer_reset(&nbuf);

-	/* Convert IMAP modified UTF-7 sequences to UTF-7. */
-	for (c = cbuf.data, shift = NULL; *c != '\0'; c++)
-		switch (*c) {
-		case '&':
-			*c = '+';
-			shift = c;
-			break;
-		case '-':
-			shift = NULL;
-			break;
-		case ',':
-			if (shift != NULL)
-				*c = '/';
-			break;
-		}
+	c = (unsigned char *)cbuf.data;
+	out = (unsigned char *)nbuf.data;

-	do {
-		inbuf = cbuf.data;
-		inlen = strlen(cbuf.data);
+	memset((void *)ucp, 0, sizeof(ucp));
+	memset((void *)b64, 0, sizeof(b64));
+	ucptemp = b64last = 0;
+	padding = 0;

-		buffer_check(&nbuf, inlen);
-		buffer_reset(&nbuf);
-
-		outbuf = nbuf.data;
-		outlen = nbuf.size;
-
-		cd = iconv_open("", "UTF-7");
-		if (cd == (iconv_t)-1) {
-			error("converting mailbox name; %s\n", strerror(errno));
-			return mbox;
-		}
-		while (inlen > 0) {
-			if (iconv(cd, &inbuf, &inlen, &outbuf, &outlen) == -1) {
-				if (errno == E2BIG) {
-					buffer_check(&nbuf, nbuf.size * 2);
-					break;
-				} else {
-					error("converting mailbox name; %s\n",
-					    strerror(errno));
-					return mbox;
-				}
+	while (*c != '\0') {
+		/* Copy all ASCII printable characters. */
+		if (*c >= 0x20 && *c <= 0x7e) {
+			if (*c != '&') {
+				*out++ = *c++;
+				continue;
 			} else {
-				iconv(cd, NULL, NULL, &outbuf, &outlen);
+				c++;
 			}
 		}
-		iconv_close(cd);
-	} while (inlen > 0);

-	*outbuf = '\0';
-	for (c = nbuf.data; (c = strchr(c,'+')) != NULL; *c = '&');
-
-	/* Convert UTF-7 sequences to IMAP modified UTF-7. */
-	for (c = cbuf.data, shift = NULL; *c != '\0'; c++)
-		switch (*c) {
-		case '+':
-			*c = '&';
-			shift = c;
-			break;
-		case '-':
-			shift = NULL;
-			break;
-		case '/':
-			if (shift != NULL)
-				*c = ',';
-			break;
+		/* Write shift character for modified UTF-7. */
+		if (*c == '-') {
+			*out++ = '&';
+			c++;
+			continue;
 		}

+		/* UTF-7 characters follow. */
+		padding = 0;
+		do {
+			/* Read Base64 characters. */
+			b64[0] = strchr(base64, *c) - base64;
+			b64[1] = strchr(base64, *(c + 1)) - base64;
+			if (padding == 0 || padding == 2) {
+				b64[2] = strchr(base64, *(c + 2)) - base64;
+				c += 3;
+			} else {
+				c += 2;
+			}
+			/* Convert from Base64 to Unicode code point. */
+			if (padding == 0) {
+				ucp[0] = (b64[0] << 2 & 0xFC) |
+				    (b64[1] >> 4 & 0x03);
+				ucp[1] = (b64[1] << 4 & 0xF0) |
+				    (b64[2] >> 2 & 0x0F);
+				b64last = b64[2];
+				padding = 2;
+			} else if (padding == 2) {
+				ucp[0] = (b64last << 6 & 0xC0) |
+				    (b64[0] & 0x3F);
+				ucp[1] = (b64[1] << 2 & 0xFC) |
+				    (b64[2] >> 4 & 0x03);
+				b64last = b64[2];
+				padding = 4;
+			} else if (padding == 4) {
+				ucp[0] = (b64last << 4 & 0xF0) |
+				    (b64[0] >> 2 & 0x0F);
+				ucp[1] = (b64[0] << 6 & 0xC0) |
+				    (b64[1] & 0x3F);
+				padding = 0;
+			}
+
+			/* Convert from Unicode to UTF-8. */
+			if (ucp[0] >= 0x00 && ucp[0] <= 0x07) {
+				*out++ = 0xC0 | (ucp[0] << 2 & 0x1C) |
+				    (ucp[1] >> 6 & 0x03);
+				*out++ = 0x80 | (ucp[1] & 0x3F);
+			} else if ((ucp[0] >= 0x08 && ucp[0] <= 0xD7) ||
+			    (ucp[0] >= 0xE0 && ucp[0] <= 0xFF)) {
+				*out++ = 0xE0 | (ucp[0] >> 4 & 0x0F);
+				*out++ = 0x80 | (ucp[0] << 2 & 0x1C) |
+				    (ucp[1] >> 6 & 0x03);
+				*out++ = 0x80 | (ucp[1] & 0x3F);
+			} else if (ucp[0] >= 0xD8 && ucp[0] <= 0xDF) {
+				b64[3] = strchr(base64, *c) - base64;
+				b64[4] = strchr(base64, *(c + 1)) - base64;
+				if (padding == 0 || padding == 2) {
+					b64[5] = strchr(base64, *(c + 2)) -
+					    base64;
+					c += 3;
+				} else {
+					c += 2;
+				}
+
+				if (padding == 0) {
+					ucp[2] = (b64[3] << 2 & 0xFC) |
+					    (b64[4] >> 4 & 0x03);
+					ucp[3] = (b64[4] << 4 & 0xF0) |
+					    (b64[5] >> 2 & 0x0F);
+					b64last = b64[5];
+					padding = 2;
+				} else if (padding == 2) {
+					ucp[2] = (b64last << 6 & 0xC0) |
+					    (b64[3] & 0x3F);
+					ucp[3] = (b64[4] << 2 & 0xFC) |
+					    (b64[5] >> 4 & 0x03);
+					b64last = b64[5];
+					padding = 4;
+				} else if (padding == 4) {
+					ucp[2] = (b64last << 4 & 0xF0) |
+					    (b64[3] >> 2 & 0x0F);
+					ucp[3] = (b64[3] << 6 & 0xC0) |
+					    (b64[4] & 0x3F);
+					padding = 0;
+				}
+
+				ucp[0] &= 0xFF - 0xDF;
+				ucptemp = ((ucp[0] << 2 & 0x0C) |
+				    (ucp[1] >> 6 & 0x03)) + 0x1;
+				ucp[2] &= 0xFF - 0xDC;
+
+				*out++ = 0xF0 | (ucptemp >> 2 & 0x03);
+				*out++ = 0x80 | (ucptemp << 4 & 0x30) |
+				    (ucp[1] >> 2 & 0x0F);
+				*out++ = 0x80 | (ucp[1] << 4 & 0x30) |
+				    (ucp[2] << 2 & 0x0C) |
+				    (ucp[3] >> 6 & 0x03);
+				*out++ = 0x80 | (ucp[3] & 0x3F);
+			}
+			if (*c == '-') {
+				c++;
+				break;
+			}
+		} while (*c != '-');
+	}
+	*out = '\0';
+
 	debug("conversion: '%s' <- '%s'\n", nbuf.data, cbuf.data);

 	return nbuf.data;