From 8ec5a3f41a12fd1a6b890bd3726974845f1f3b6e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Wilfried=20G=C3=B6esgens?= Date: Sun, 5 Oct 2008 15:42:29 +0000 Subject: [PATCH] * add autoheader for zlib and iconv detection * upsie. gzip code was never enabled... * add raw iron stuf for de-qp strbuffer. --- libcitadel/bootstrap | 1 + libcitadel/configure.in | 75 ++++++++++ libcitadel/lib/libcitadel.h | 3 +- libcitadel/lib/stringbuf.c | 272 +++++++++++++++++++++++++++++++++++- 4 files changed, 349 insertions(+), 2 deletions(-) diff --git a/libcitadel/bootstrap b/libcitadel/bootstrap index bf4277220..9d70ce500 100755 --- a/libcitadel/bootstrap +++ b/libcitadel/bootstrap @@ -1,3 +1,4 @@ #!/bin/sh autoconf +autoheader \ No newline at end of file diff --git a/libcitadel/configure.in b/libcitadel/configure.in index 1d82e741a..b70532b8e 100755 --- a/libcitadel/configure.in +++ b/libcitadel/configure.in @@ -72,6 +72,79 @@ if test "x$ok_zlib" = xyes ; then AC_DEFINE(HAVE_ZLIB,[],[whether we have zlib]) fi +AC_CHECK_HEADERS(iconv.h) + + +dnl Here is the check for a libc integrated iconv +AC_ARG_ENABLE(iconv, + [ --disable-iconv do not use iconv charset conversion], + ok_iconv=no, ok_iconv=yes) + +AC_MSG_CHECKING(Checking to see if your system supports iconv) +AC_TRY_RUN([ + #include + main() { + iconv_t ic = (iconv_t)(-1) ; + ic = iconv_open("UTF-8", "us-ascii"); + iconv_close(ic); + exit(0); + } + ], + [ + ok_iconv=yes + AC_MSG_RESULT([yes]) + ], + [ + ok_iconv=no + AC_MSG_RESULT([no]) + ] +) + +dnl Check for iconv in external libiconv +if test "$ok_iconv" = no; then + AC_MSG_CHECKING(Checking for an external libiconv) + OLD_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -liconv" + AC_TRY_RUN([ + #include + main() { + iconv_t ic = (iconv_t)(-1) ; + ic = iconv_open("UTF-8", "us-ascii"); + iconv_close(ic); + } + ], + [ + ok_iconv=yes + AC_MSG_RESULT([yes]) + ], + [ + ok_iconv=no + LDFLAGS="$OLD_LDFLAGS" + AC_MSG_RESULT([no]) + ] + ) +fi +if test "$ok_iconv" != "no"; then + AC_MSG_RESULT(libcitadel will be built with character set conversion.) + AC_DEFINE(HAVE_ICONV,[],[whether we have iconv for charset conversion]) +else + AC_MSG_RESULT(libcitadel will be built without character set conversion.) +fi + + +AC_ARG_WITH(with_zlib, [ --with-zlib use zlib compression if present]) +dnl Checks for the zlib compression library. +if test "x$with_zlib" != xno ; then + AC_CHECK_HEADERS(zlib.h, + [AC_CHECK_LIB(z, zlibVersion, + [ok_zlib=yes],, + )]) +fi + +if test "x$ok_zlib" = xyes ; then + LIBS="-lz $LIBS" + AC_DEFINE(HAVE_ZLIB,[],[whether we have zlib]) +fi dnl Checks for typedefs, structures, and compiler characteristics. @@ -81,6 +154,7 @@ AC_TYPE_SIZE_T AC_CHECK_FUNCS(memmove bcopy) AC_CONFIG_FILES(Makefile libcitadel.pc) +AC_CONFIG_HEADER(sysdep.h) AC_OUTPUT abs_srcdir="`cd $srcdir && pwd`" @@ -89,3 +163,4 @@ if test "$abs_srcdir" != "$abs_builddir"; then make mkdir-init fi echo 'zlib compression: ' $ok_zlib +echo 'Character set conversion support:' $ok_iconv diff --git a/libcitadel/lib/libcitadel.h b/libcitadel/lib/libcitadel.h index 35438cdd7..f47c3cc3b 100644 --- a/libcitadel/lib/libcitadel.h +++ b/libcitadel/lib/libcitadel.h @@ -261,6 +261,7 @@ void StrBufEUid_escapize(StrBuf *target, const StrBuf *source); void StrBufReplaceChars(StrBuf *buf, char search, char replace); int CompressBuffer(StrBuf *Buf); +void StrBuf_RFC822_to_Utf8(StrBuf **Buf, const StrBuf* DefaultCharset); int StrBufDecodeBase64(StrBuf *Buf); int StrBufRFC2047encode(StrBuf **target, const StrBuf *source); #define LB (1) /* Internal escape chars */ @@ -361,7 +362,7 @@ long StrBufUnescape(StrBuf *Buf, int StripBlanks); /* * Hash list implementation for Citadel */ -#define HKEY(a) a, sizeof(a) - 1 +#define HKEY(a) a, (sizeof(a) - 1) typedef struct HashList HashList; typedef struct HashKey HashKey; diff --git a/libcitadel/lib/stringbuf.c b/libcitadel/lib/stringbuf.c index e2dae7730..e28e9f5af 100644 --- a/libcitadel/lib/stringbuf.c +++ b/libcitadel/lib/stringbuf.c @@ -1,3 +1,4 @@ +#include "../sysdep.h" #include #include #include @@ -10,6 +11,20 @@ #include #include "libcitadel.h" +#ifdef HAVE_ICONV +#include +#endif + +#ifdef HAVE_ZLIB +#include +#endif + + +#ifdef HAVE_ZLIB +#include +int ZEXPORT compress_gzip(Bytef * dest, size_t * destLen, + const Bytef * source, uLong sourceLen, int level); +#endif /** * Private Structure for the Stringbuffer @@ -1297,7 +1312,7 @@ int CompressBuffer(StrBuf *Buf) &compressed_len, (Bytef *) Buf->buf, (uLongf) Buf->BufUsed, Z_BEST_SPEED) == Z_OK) { - if (!ConstBuf) + if (!Buf->ConstBuf) free(Buf->buf); Buf->buf = compressed_data; Buf->BufUsed = compressed_len; @@ -1462,3 +1477,258 @@ void StrBufReplaceChars(StrBuf *buf, char search, char replace) buf->buf[i] = replace; } + + + +/* + * Wrapper around iconv_open() + * Our version adds aliases for non-standard Microsoft charsets + * such as 'MS950', aliasing them to names like 'CP950' + * + * tocode Target encoding + * fromcode Source encoding + */ +static iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode) +{ + iconv_t ic = (iconv_t)(-1) ; + ic = iconv_open(tocode, fromcode); + if (ic == (iconv_t)(-1) ) { + char alias_fromcode[64]; + if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) { + safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode); + alias_fromcode[0] = 'C'; + alias_fromcode[1] = 'P'; + ic = iconv_open(tocode, alias_fromcode); + } + } + return(ic); +} + + +#ifdef HAVE_ICONV + +static inline char *FindNextEnd (StrBuf *Buf, char *bptr) +{ + char * end; + /* Find the next ?Q? */ + if (Buf->BufUsed - (bptr - Buf->buf) < 6) + return NULL; + + end = strchr(bptr + 2, '?'); + + if (end == NULL) + return NULL; + + if ((Buf->BufUsed - (end - Buf->buf) > 3) && + ((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && + (*(end + 2) == '?')) { + /* skip on to the end of the cluster, the next ?= */ + end = strstr(end + 3, "?="); + } + else + /* sort of half valid encoding, try to find an end. */ + end = strstr(bptr, "?="); + return end; +} + + +/* + * Handle subjects with RFC2047 encoding such as: + * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?= + */ +void StrBuf_RFC822_to_Utf8(StrBuf **Buf, const StrBuf* DefaultCharset) { + StrBuf *TmpBuf, *ConvertBuf, *ConvertBuf2; + StrBuf StaticBuf; + char *start, *end, *next, *nextend, *ptr; + char charset[128]; + char encoding[16]; + iconv_t ic = (iconv_t)(-1) ; + char *ibuf; /**< Buffer of characters to be converted */ + char *obuf; /**< Buffer for converted characters */ + size_t ibuflen; /**< Length of input buffer */ + size_t obuflen; /**< Length of output buffer */ + char *isav; /**< Saved pointer to input buffer */ + + const char *eptr; + int passes = 0; + int i, len, delta; + int illegal_non_rfc2047_encoding = 0; + + /* Sometimes, badly formed messages contain strings which were simply + * written out directly in some foreign character set instead of + * using RFC2047 encoding. This is illegal but we will attempt to + * handle it anyway by converting from a user-specified default + * charset to UTF-8 if we see any nonprintable characters. + */ + TmpBuf = NewStrBufPlain(NULL, StrLength(*Buf)); + + len = StrLength(*Buf); + for (i=0; i<(*Buf)->BufUsed; ++i) { + if (((*Buf)->buf[i] < 32) || ((*Buf)->buf[i] > 126)) { + illegal_non_rfc2047_encoding = 1; + break; + } + } + + if (illegal_non_rfc2047_encoding) { + if ( (strcasecmp(ChrPtr(DefaultCharset), "UTF-8")) && + (strcasecmp(ChrPtr(DefaultCharset), "us-ascii")) ) { + ic = ctdl_iconv_open("UTF-8", ChrPtr(DefaultCharset)); + if (ic != (iconv_t)(-1) ) { + ibuf = (*Buf)->buf; + obuf = TmpBuf->buf; + ibuflen = (*Buf)->BufUsed; + obuflen = TmpBuf->BufSize; + + iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen); + TmpBuf->BufUsed = TmpBuf->BufSize - obuflen; + TmpBuf->buf[TmpBuf->BufUsed] = '\0'; + + FreeStrBuf(Buf); + *Buf = TmpBuf; + TmpBuf = NewStrBufPlain(NULL, StrLength(*Buf)); + + iconv_close(ic); + } + } + } + + /* pre evaluate the first pair */ + nextend = end = NULL; + len = StrLength(*Buf); + start = strstr((*Buf)->buf, "=?"); + eptr = (*Buf)->buf + (*Buf)->BufUsed; + if (start != NULL) + end = FindNextEnd (*Buf, start); + + while ((start != NULL) && + (end != NULL) && + (start < eptr) && + (end < eptr)) + { + next = strstr(end, "=?"); + nextend = NULL; + if ((next != NULL) && + (next < eptr)) + nextend = FindNextEnd(*Buf, next); + if (nextend == NULL) + next = NULL; + + /* did we find two partitions */ + if ((next != NULL) && + ((next - end) > 2)) + { + ptr = end + 2; + while ((ptr < next) && + (isspace(*ptr) || + (*ptr == '\r') || + (*ptr == '\n') || + (*ptr == '\t'))) + ptr ++; + /* did we find a gab just filled with blanks? */ + if (ptr == next) + { + memmove (end + 2, + next, + len - (next - start)); + + /* now terminate the gab at the end */ + delta = (next - end) - 2; + (*Buf)->BufUsed -= delta; + (*Buf)->buf[(*Buf)->BufUsed] = '\0'; + + /* move next to its new location. */ + next -= delta; + nextend -= delta; + } + } + /* our next-pair is our new first pair now. */ + start = next; + end = nextend; + } + + ConvertBuf = NewStrBufPlain(NULL, StrLength(*Buf)); + ConvertBuf2 = NewStrBufPlain(NULL, StrLength(*Buf)); + /* Now we handle foreign character sets properly encoded + * in RFC2047 format. + */ + while (start=strstr((*Buf)->buf, "=?"), + end=FindNextEnd((*Buf), ((start != NULL)? start : (*Buf)->buf)), + ((start != NULL) && + (end != NULL) && + (end > start)) ) + { + StaticBuf.buf = start; + StaticBuf.BufUsed = (*Buf)->BufUsed - ((*Buf)->buf - start); + StaticBuf.BufSize = (*Buf)->BufSize - ((*Buf)->buf - start); + extract_token(charset, start, 1, '?', sizeof charset); + extract_token(encoding, start, 2, '?', sizeof encoding); + StrBufExtract_token(ConvertBuf, &StaticBuf, 3, '?'); + + if (!strcasecmp(encoding, "B")) { /**< base64 */ + ConvertBuf2->BufUsed = CtdlDecodeBase64(ConvertBuf2->buf, + ConvertBuf->buf, + ConvertBuf->BufUsed); + } + else if (!strcasecmp(encoding, "Q")) { /**< quoted-printable */ + long pos; + + pos = 0; + while (pos < ConvertBuf->BufUsed) + { + if (ConvertBuf->buf[pos] == '_') + ConvertBuf->buf[pos] = ' '; + pos++; + } + + ConvertBuf2->BufUsed = CtdlDecodeQuotedPrintable( + ConvertBuf2->buf, + ConvertBuf->buf, + ConvertBuf->BufUsed); + } + else { + StrBufAppendBuf(ConvertBuf2, ConvertBuf, 0); + } + + ic = ctdl_iconv_open("UTF-8", charset); + if (ic != (iconv_t)(-1) ) { + ibuf = ConvertBuf2->buf; + obuf = ConvertBuf->buf; + ibuf = ConvertBuf2->buf; + obuflen = ConvertBuf->BufSize; + ibuflen = ConvertBuf2->BufUsed; + + iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen); + ConvertBuf->BufUsed = ConvertBuf->BufSize - obuflen; + ConvertBuf->buf[ConvertBuf->BufUsed] = '\0'; + + StrBufAppendBuf(TmpBuf, ConvertBuf, 0); + iconv_close(ic); + } + else { + + StrBufAppendBufPlain(TmpBuf, HKEY("(unreadable)"), 0); + } + + free(isav); + + /* + * Since spammers will go to all sorts of absurd lengths to get their + * messages through, there are LOTS of corrupt headers out there. + * So, prevent a really badly formed RFC2047 header from throwing + * this function into an infinite loop. + */ + ++passes; + if (passes > 20) { + FreeStrBuf(Buf); + *Buf = TmpBuf; + return; + } + } + FreeStrBuf(Buf); + *Buf = TmpBuf; +} +#else +void StrBuf_RFC822_to_Utf8(StrBuf **Buf, const StrBuf* DefaultCharset) {}; + +#endif -- 2.30.2