X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=libcitadel%2Flib%2Fstringbuf.c;h=e28e9f5aff0e39e00cb23f8c0ab5d5c8dbdea482;hb=8ec5a3f41a12fd1a6b890bd3726974845f1f3b6e;hp=e2dae77306fc523734fb38282cc505de2b34f8aa;hpb=56cba54ee7421b9b886bbe1eab828f22475fb9c4;p=citadel.git

diff --git a/libcitadel/lib/stringbuf.c b/libcitadel/lib/stringbuf.c
index e2dae7730..e28e9f5af 100644
--- a/libcitadel/lib/stringbuf.c
+++ b/libcitadel/lib/stringbuf.c
@@ -1,3 +1,4 @@
+#include "../sysdep.h"
 #include <ctype.h>
 #include <errno.h>
 #include <string.h>
@@ -10,6 +11,20 @@
 #include <stdarg.h>
 #include "libcitadel.h"
 
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#endif
+
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+#endif
+
+
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+int ZEXPORT compress_gzip(Bytef * dest, size_t * destLen,
+                          const Bytef * source, uLong sourceLen, int level);
+#endif
 
 /**
  * Private Structure for the Stringbuffer
@@ -1297,7 +1312,7 @@ int CompressBuffer(StrBuf *Buf)
 			  &compressed_len,
 			  (Bytef *) Buf->buf,
 			  (uLongf) Buf->BufUsed, Z_BEST_SPEED) == Z_OK) {
-		if (!ConstBuf)
+		if (!Buf->ConstBuf)
 			free(Buf->buf);
 		Buf->buf = compressed_data;
 		Buf->BufUsed = compressed_len;
@@ -1462,3 +1477,258 @@ void StrBufReplaceChars(StrBuf *buf, char search, char replace)
 			buf->buf[i] = replace;
 
 }
+
+
+
+/*
+ * Wrapper around iconv_open()
+ * Our version adds aliases for non-standard Microsoft charsets
+ * such as 'MS950', aliasing them to names like 'CP950'
+ *
+ * tocode	Target encoding
+ * fromcode	Source encoding
+ */
+static iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode)
+{
+	iconv_t ic = (iconv_t)(-1) ;
+	ic = iconv_open(tocode, fromcode);
+	if (ic == (iconv_t)(-1) ) {
+		char alias_fromcode[64];
+		if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) {
+			safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode);
+			alias_fromcode[0] = 'C';
+			alias_fromcode[1] = 'P';
+			ic = iconv_open(tocode, alias_fromcode);
+		}
+	}
+	return(ic);
+}
+
+
+#ifdef HAVE_ICONV
+
+static inline char *FindNextEnd (StrBuf *Buf, char *bptr)
+{
+	char * end;
+	/* Find the next ?Q? */
+	if (Buf->BufUsed - (bptr - Buf->buf)  < 6)
+		return NULL;
+
+	end = strchr(bptr + 2, '?');
+
+	if (end == NULL)
+		return NULL;
+
+	if ((Buf->BufUsed - (end - Buf->buf) > 3) &&
+	    ((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && 
+	    (*(end + 2) == '?')) {
+		/* skip on to the end of the cluster, the next ?= */
+		end = strstr(end + 3, "?=");
+	}
+	else
+		/* sort of half valid encoding, try to find an end. */
+		end = strstr(bptr, "?=");
+	return end;
+}
+
+
+/*
+ * Handle subjects with RFC2047 encoding such as:
+ * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
+ */
+void StrBuf_RFC822_to_Utf8(StrBuf **Buf, const StrBuf* DefaultCharset) {
+	StrBuf *TmpBuf, *ConvertBuf, *ConvertBuf2;
+	StrBuf StaticBuf;
+	char *start, *end, *next, *nextend, *ptr;
+	char charset[128];
+	char encoding[16];
+	iconv_t ic = (iconv_t)(-1) ;
+	char *ibuf;			/**< Buffer of characters to be converted */
+	char *obuf;			/**< Buffer for converted characters */
+	size_t ibuflen;			/**< Length of input buffer */
+	size_t obuflen;			/**< Length of output buffer */
+	char *isav;			/**< Saved pointer to input buffer */
+	
+	const char *eptr;
+	int passes = 0;
+	int i, len, delta;
+	int illegal_non_rfc2047_encoding = 0;
+
+	/* Sometimes, badly formed messages contain strings which were simply
+	 *  written out directly in some foreign character set instead of
+	 *  using RFC2047 encoding.  This is illegal but we will attempt to
+	 *  handle it anyway by converting from a user-specified default
+	 *  charset to UTF-8 if we see any nonprintable characters.
+	 */
+	TmpBuf = NewStrBufPlain(NULL, StrLength(*Buf));
+
+	len = StrLength(*Buf);
+	for (i=0; i<(*Buf)->BufUsed; ++i) {
+		if (((*Buf)->buf[i] < 32) || ((*Buf)->buf[i] > 126)) {
+			illegal_non_rfc2047_encoding = 1;
+			break;
+		}
+	}
+
+	if (illegal_non_rfc2047_encoding) {
+		if ( (strcasecmp(ChrPtr(DefaultCharset), "UTF-8")) && 
+		     (strcasecmp(ChrPtr(DefaultCharset), "us-ascii")) ) {
+			ic = ctdl_iconv_open("UTF-8", ChrPtr(DefaultCharset));
+			if (ic != (iconv_t)(-1) ) {
+				ibuf = (*Buf)->buf;
+				obuf = TmpBuf->buf;
+				ibuflen = (*Buf)->BufUsed;
+				obuflen = TmpBuf->BufSize;
+
+				iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
+				TmpBuf->BufUsed = TmpBuf->BufSize - obuflen;
+				TmpBuf->buf[TmpBuf->BufUsed] = '\0';
+
+				FreeStrBuf(Buf);
+				*Buf = TmpBuf;
+				TmpBuf = NewStrBufPlain(NULL, StrLength(*Buf));
+
+				iconv_close(ic);
+			}
+		}
+	}
+
+	/* pre evaluate the first pair */
+	nextend = end = NULL;
+	len = StrLength(*Buf);
+	start = strstr((*Buf)->buf, "=?");
+	eptr = (*Buf)->buf + (*Buf)->BufUsed;
+	if (start != NULL) 
+		end = FindNextEnd (*Buf, start);
+
+	while ((start != NULL) && 
+	       (end != NULL) && 
+	       (start < eptr) && 
+	       (end < eptr))
+	{
+		next = strstr(end, "=?");
+		nextend = NULL;
+		if ((next != NULL) && 
+		    (next < eptr))
+			nextend = FindNextEnd(*Buf, next);
+		if (nextend == NULL)
+			next = NULL;
+
+		/* did we find two partitions */
+		if ((next != NULL) && 
+		    ((next - end) > 2))
+		{
+			ptr = end + 2;
+			while ((ptr < next) && 
+			       (isspace(*ptr) ||
+				(*ptr == '\r') ||
+				(*ptr == '\n') || 
+				(*ptr == '\t')))
+				ptr ++;
+			/* did we find a gab just filled with blanks? */
+			if (ptr == next)
+			{
+				memmove (end + 2,
+					 next,
+					 len - (next - start));
+				
+				/* now terminate the gab at the end */
+				delta = (next - end) - 2;
+				(*Buf)->BufUsed -= delta;
+				(*Buf)->buf[(*Buf)->BufUsed] = '\0';
+
+				/* move next to its new location. */
+				next -= delta;
+				nextend -= delta;
+			}
+		}
+		/* our next-pair is our new first pair now. */
+		start = next;
+		end = nextend;
+	}
+
+	ConvertBuf = NewStrBufPlain(NULL, StrLength(*Buf));
+	ConvertBuf2 = NewStrBufPlain(NULL, StrLength(*Buf));
+	/* Now we handle foreign character sets properly encoded
+	 * in RFC2047 format.
+	 */
+	while (start=strstr((*Buf)->buf, "=?"), 
+	       end=FindNextEnd((*Buf), ((start != NULL)? start : (*Buf)->buf)),
+			       ((start != NULL) && 
+				(end != NULL) && 
+				(end > start)) )
+	{
+		StaticBuf.buf = start;
+		StaticBuf.BufUsed = (*Buf)->BufUsed - ((*Buf)->buf - start);
+		StaticBuf.BufSize = (*Buf)->BufSize - ((*Buf)->buf - start);
+		extract_token(charset, start, 1, '?', sizeof charset);
+		extract_token(encoding, start, 2, '?', sizeof encoding);
+		StrBufExtract_token(ConvertBuf, &StaticBuf, 3, '?');
+
+		if (!strcasecmp(encoding, "B")) {	/**< base64 */
+			ConvertBuf2->BufUsed = CtdlDecodeBase64(ConvertBuf2->buf, 
+								ConvertBuf->buf, 
+								ConvertBuf->BufUsed);
+		}
+		else if (!strcasecmp(encoding, "Q")) {	/**< quoted-printable */
+			long pos;
+			
+			pos = 0;
+			while (pos < ConvertBuf->BufUsed)
+			{
+				if (ConvertBuf->buf[pos] == '_') 
+					ConvertBuf->buf[pos] = ' ';
+				pos++;
+			}
+
+			ConvertBuf2->BufUsed = CtdlDecodeQuotedPrintable(
+				ConvertBuf2->buf, 
+				ConvertBuf->buf,
+				ConvertBuf->BufUsed);
+		}
+		else {
+			StrBufAppendBuf(ConvertBuf2, ConvertBuf, 0);
+		}
+
+		ic = ctdl_iconv_open("UTF-8", charset);
+		if (ic != (iconv_t)(-1) ) {
+			ibuf = ConvertBuf2->buf;
+			obuf = ConvertBuf->buf;
+			ibuf = ConvertBuf2->buf;
+			obuflen = ConvertBuf->BufSize;
+			ibuflen = ConvertBuf2->BufUsed;
+
+			iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
+			ConvertBuf->BufUsed = ConvertBuf->BufSize - obuflen;
+			ConvertBuf->buf[ConvertBuf->BufUsed] = '\0';
+
+			StrBufAppendBuf(TmpBuf, ConvertBuf, 0);
+			iconv_close(ic);
+		}
+		else {
+
+			StrBufAppendBufPlain(TmpBuf, HKEY("(unreadable)"), 0);
+		}
+
+		free(isav);
+
+		/*
+		 * Since spammers will go to all sorts of absurd lengths to get their
+		 * messages through, there are LOTS of corrupt headers out there.
+		 * So, prevent a really badly formed RFC2047 header from throwing
+		 * this function into an infinite loop.
+		 */
+		++passes;
+		if (passes > 20)  { 
+			FreeStrBuf(Buf);
+			*Buf = TmpBuf;
+			return;
+		}
+	}
+	FreeStrBuf(Buf);
+	*Buf = TmpBuf;
+}
+#else
+void StrBuf_RFC822_to_Utf8(StrBuf **Buf, const StrBuf* DefaultCharset) {};
+
+#endif