From 4c6e691894791003cc46faa150674bf2f8a11c6d Mon Sep 17 00:00:00 2001 From: Art Cancro Date: Sat, 24 Feb 2007 04:58:54 +0000 Subject: [PATCH] Updated the html2html filter. When we encounter a charset declaration within the meta tags, the charset 'unicode' is manually aliased to 'UTF-8'. GNU iconv wants to treat it as an alias for 'UTF-16' but messages generated by brain-damaged MS Exchange webmail are declaring 'unicode' when they really mean 'UTF-8'. This change only applies to meta tags; it is not a program-wide alias. --- webcit/html2html.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/webcit/html2html.c b/webcit/html2html.c index 8799aaa2a..35fffd5d2 100644 --- a/webcit/html2html.c +++ b/webcit/html2html.c @@ -59,6 +59,18 @@ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_ striplt(buf); if (!strncasecmp(buf, "charset=", 8)) { strcpy(charset, &buf[8]); + + /* + * The brain-damaged webmail program in Microsoft Exchange declares + * a charset of "unicode" when they really mean "UTF-8". GNU iconv + * treats "unicode" as an alias for "UTF-16" so we have to manually + * fix this here, otherwise messages generated in Exchange webmail + * show up as a big pile of weird characters. + */ + if (!strcasecmp(charset, "unicode")) { + strcpy(charset, "UTF-8"); + } + } } -- 2.30.2