]> code.citadel.org Git - citadel.git/blobdiff - webcit/messages.c
* HTML messages in foreign character sets are now converted to UTF-8 for
[citadel.git] / webcit / messages.c
index ab02ee04a1bdab7a69ca0b6f9e510a9f9863bb50..370454c792a02a3eb3804417b9f1bec73d4da6a0 100644 (file)
 #include <stdarg.h>
 #include <pthread.h>
 #include <signal.h>
+
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#endif
+
 #include "webcit.h"
 #include "vcard.h"
 #include "webserver.h"
@@ -37,6 +42,74 @@ struct addrbookent {
 };
 
 
+
+#ifdef HAVE_ICONV
+/* Handle subjects with RFC2047 encoding, such as:
+ * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
+ */
+void utf8ify_rfc822_string(char *buf) {
+       char *start, *end;
+       char newbuf[1024];
+       char charset[128];
+       char encoding[16];
+       char istr[1024];
+       iconv_t ic = (iconv_t)(-1) ;
+       char *ibuf;                   /* Buffer of characters to be converted */
+       char *obuf;                   /* Buffer for converted characters      */
+       size_t ibuflen;               /* Length of input buffer               */
+       size_t obuflen;               /* Length of output buffer              */
+       char *isav;                   /* Saved pointer to input buffer        */
+       char *osav;                   /* Saved pointer to output buffer       */
+
+       while (start=strstr(buf, "=?"), end=strstr(buf, "?="),
+               ((start != NULL) && (end != NULL) && (end > start)) )
+       {
+               extract_token(charset, start, 1, '?', sizeof charset);
+               extract_token(encoding, start, 2, '?', sizeof encoding);
+               extract_token(istr, start, 3, '?', sizeof istr);
+
+               strcpy(start, "");
+               ++end;
+               ++end;
+
+               ibuf = malloc(1024);
+               isav = ibuf;
+               if (!strcasecmp(encoding, "B")) {       /* base64 */
+                       ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
+               }
+               else if (!strcasecmp(encoding, "Q")) {  /* quoted-printable */
+                       ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, strlen(istr));
+               }
+               else {
+                       strcpy(ibuf, istr);             /* huh? */
+                       ibuflen = strlen(istr);
+               }
+
+               ic = iconv_open("UTF-8", charset);
+               if (ic != (iconv_t)(-1) ) {
+                       obuf = malloc(1024);
+                       obuflen = 1024;
+                       obuf = (char *) malloc(obuflen);
+                       osav = obuf;
+                       iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
+                       osav[1024-obuflen] = 0;
+                       snprintf(newbuf, sizeof newbuf, "%s%s%s", buf, osav, end);
+                       strcpy(buf, newbuf);
+                       free(osav);
+                       iconv_close(ic);
+               }
+               else {
+                       snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", buf, end);
+                       strcpy(buf, newbuf);
+               }
+
+               free(isav);
+       }
+
+}
+#endif
+
+
 /*
  * Look for URL's embedded in a buffer and make them linkable.  We use a
  * target window in order to keep the BBS session in its own window.
@@ -425,6 +498,14 @@ void read_message(long msgnum) {
        char vcard_partnum[256];
        char cal_partnum[256];
        char *part_source = NULL;
+#ifdef HAVE_ICONV
+       iconv_t ic = (iconv_t)(-1) ;
+       char *ibuf;                   /* Buffer of characters to be converted */
+       char *obuf;                   /* Buffer for converted characters      */
+       size_t ibuflen;               /* Length of input buffer               */
+       size_t obuflen;               /* Length of output buffer              */
+       char *osav;                   /* Saved pointer to output buffer       */
+#endif
 
        strcpy(from, "");
        strcpy(node, "");
@@ -470,6 +551,9 @@ void read_message(long msgnum) {
                if (!strncasecmp(buf, "from=", 5)) {
                        strcpy(from, &buf[5]);
                        wprintf("from <A HREF=\"/showuser&who=");
+#ifdef HAVE_ICONV
+                       utf8ify_rfc822_string(from);
+#endif
                        urlescputs(from);
                        wprintf("\">");
                        escputs(from);
@@ -575,6 +659,9 @@ void read_message(long msgnum) {
        }
 
        wprintf("</SPAN>");
+#ifdef HAVE_ICONV
+       utf8ify_rfc822_string(m_subject);
+#endif
        if (strlen(m_subject) > 0) {
                wprintf("<br />"
                        "<SPAN CLASS=\"message_subject\">"
@@ -640,11 +727,16 @@ void read_message(long msgnum) {
                }
        }
 
-       /*
-       wprintf("Content-type: %s<br />\n", mime_content_type);
-       wprintf("Charset: %s<br />\n", mime_charset);
-       PUT CHARSET TRANSLATOR HERE
-       */
+       /* Set up a character set conversion if we need to (and if we can) */
+#ifdef HAVE_ICONV
+       if ( (strcasecmp(mime_charset, "us-ascii"))
+          && (strcasecmp(mime_charset, "UTF-8")) ) {
+               ic = iconv_open("UTF-8", mime_charset);
+               if (ic == (iconv_t)(-1) ) {
+                       lprintf(5, "iconv_open() failed: %s\n", strerror(errno));
+               }
+       }
+#endif
 
        /* Messages in legacy Citadel variformat get handled thusly... */
        if (!strcasecmp(mime_content_type, "text/x-citadel-variformat")) {
@@ -656,6 +748,21 @@ void read_message(long msgnum) {
                while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
                        if (buf[strlen(buf)-1] == '\n') buf[strlen(buf)-1] = 0;
                        if (buf[strlen(buf)-1] == '\r') buf[strlen(buf)-1] = 0;
+
+#ifdef HAVE_ICONV
+                       if (ic != (iconv_t)(-1) ) {
+                               ibuf = buf;
+                               ibuflen = strlen(ibuf);
+                               obuflen = SIZ;
+                               obuf = (char *) malloc(obuflen);
+                               osav = obuf;
+                               iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
+                               osav[SIZ-obuflen] = 0;
+                               safestrncpy(buf, osav, sizeof buf);
+                               free(osav);
+                       }
+#endif
+
                        while ((strlen(buf) > 0) && (isspace(buf[strlen(buf) - 1])))
                                buf[strlen(buf) - 1] = 0;
                        if ((bq == 0) &&
@@ -677,7 +784,7 @@ void read_message(long msgnum) {
 
        else /* HTML is fun, but we've got to strip it first */
        if (!strcasecmp(mime_content_type, "text/html")) {
-               output_html();
+               output_html(mime_charset);
        }
 
        /* Unknown weirdness */
@@ -733,6 +840,12 @@ ENDBODY:
        /* end everythingamundo table */
        wprintf("</TD></TR></TABLE>\n");
        wprintf("</div><br />\n");
+
+#ifdef HAVE_ICONV
+       if (ic != (iconv_t)(-1) ) {
+               iconv_close(ic);
+       }
+#endif
 }
 
 
@@ -763,6 +876,10 @@ void summarize_message(long msgnum, int is_new) {
                if (!strncasecmp(buf, "subj=", 5)) {
                        if (strlen(&buf[5]) > 0) {
                                strcpy(summ.subj, &buf[5]);
+#ifdef HAVE_ICONV
+                               /* Handle subjects with RFC2047 encoding */
+                               utf8ify_rfc822_string(summ.subj);
+#endif
                                if (strlen(summ.subj) > 75) {
                                        strcpy(&summ.subj[72], "...");
                                }
@@ -793,6 +910,10 @@ void summarize_message(long msgnum, int is_new) {
                }
        }
        
+#ifdef HAVE_ICONV
+       /* Handle senders with RFC2047 encoding */
+       utf8ify_rfc822_string(summ.from);
+#endif
        if (strlen(summ.from) > 25) {
                strcpy(&summ.from[22], "...");
        }