]> code.citadel.org Git - citadel.git/blobdiff - libcitadel/lib/stringbuf.c
* add autoheader for zlib and iconv detection
[citadel.git] / libcitadel / lib / stringbuf.c
index 49e4e314c22329ddfc438a9af5e88d6849ce4c16..e28e9f5aff0e39e00cb23f8c0ab5d5c8dbdea482 100644 (file)
@@ -1,3 +1,4 @@
+#include "../sysdep.h"
 #include <ctype.h>
 #include <errno.h>
 #include <string.h>
 #include <stdarg.h>
 #include "libcitadel.h"
 
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#endif
+
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+#endif
+
+
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+int ZEXPORT compress_gzip(Bytef * dest, size_t * destLen,
+                          const Bytef * source, uLong sourceLen, int level);
+#endif
 
 /**
  * Private Structure for the Stringbuffer
@@ -24,6 +39,10 @@ struct StrBuf {
 
 /** 
  * \Brief Cast operator to Plain String 
+ * Note: if the buffer is altered by StrBuf operations, this pointer may become 
+ *  invalid. So don't lean on it after altering the buffer!
+ *  Since this operation is considered cheap, rather call it often than risking
+ *  your pointer to become invalid!
  * \param Str the string we want to get the c-string representation for
  * \returns the Pointer to the Content. Don't mess with it!
  */
@@ -44,6 +63,40 @@ inline int StrLength(const StrBuf *Str)
        return (Str != NULL) ? Str->BufUsed : 0;
 }
 
+/**
+ * \brief local utility function to resize the buffer
+ * \param Buf the buffer whichs storage we should increase
+ * \param KeepOriginal should we copy the original buffer or just start over with a new one
+ * \param DestSize what should fit in after?
+ */
+static int IncreaseBuf(StrBuf *Buf, int KeepOriginal, int DestSize)
+{
+       char *NewBuf;
+       size_t NewSize = Buf->BufSize * 2;
+
+       if (Buf->ConstBuf)
+               return -1;
+               
+       if (DestSize > 0)
+               while (NewSize < DestSize)
+                       NewSize *= 2;
+
+       NewBuf= (char*) malloc(NewSize);
+       if (KeepOriginal && (Buf->BufUsed > 0))
+       {
+               memcpy(NewBuf, Buf->buf, Buf->BufUsed);
+       }
+       else
+       {
+               NewBuf[0] = '\0';
+               Buf->BufUsed = 0;
+       }
+       free (Buf->buf);
+       Buf->buf = NewBuf;
+       Buf->BufSize *= 2;
+       return Buf->BufSize;
+}
+
 /**
  * Allocate a new buffer with default buffer size
  * \returns the new stringbuffer
@@ -166,39 +219,6 @@ StrBuf* _NewConstStrBuf(const char* StringConstant, size_t SizeOfStrConstant)
        return NewBuf;
 }
 
-/**
- * \brief local utility function to resize the buffer
- * \param Buf the buffer whichs storage we should increase
- * \param KeepOriginal should we copy the original buffer or just start over with a new one
- * \param DestSize what should fit in after?
- */
-static int IncreaseBuf(StrBuf *Buf, int KeepOriginal, int DestSize)
-{
-       char *NewBuf;
-       size_t NewSize = Buf->BufSize * 2;
-
-       if (Buf->ConstBuf)
-               return -1;
-               
-       if (DestSize > 0)
-               while (NewSize < DestSize)
-                       NewSize *= 2;
-
-       NewBuf= (char*) malloc(NewSize);
-       if (KeepOriginal && (Buf->BufUsed > 0))
-       {
-               memcpy(NewBuf, Buf->buf, Buf->BufUsed);
-       }
-       else
-       {
-               NewBuf[0] = '\0';
-               Buf->BufUsed = 0;
-       }
-       free (Buf->buf);
-       Buf->buf = NewBuf;
-       Buf->BufSize *= 2;
-       return Buf->BufSize;
-}
 
 /**
  * \brief flush the content of a Buf; keep its struct
@@ -699,9 +719,73 @@ void StrBufPrintf(StrBuf *Buf, const char *format, ...)
  */
 inline int StrBufNum_tokens(const StrBuf *source, char tok)
 {
+       if (source == NULL)
+               return 0;
        return num_tokens(source->buf, tok);
 }
 
+/*
+ * remove_token() - a tokenizer that kills, maims, and destroys
+ */
+/**
+ * \brief a string tokenizer
+ * \param Source StringBuffer to read into
+ * \param parmnum n'th parameter to remove
+ * \param separator tokenizer param
+ * \returns -1 if not found, else length of token.
+ */
+int StrBufRemove_token(StrBuf *Source, int parmnum, char separator)
+{
+       int ReducedBy;
+       char *d, *s;            /* dest, source */
+       int count = 0;
+
+       /* Find desired parameter */
+       d = Source->buf;
+       while (count < parmnum) {
+               /* End of string, bail! */
+               if (!*d) {
+                       d = NULL;
+                       break;
+               }
+               if (*d == separator) {
+                       count++;
+               }
+               d++;
+       }
+       if (!d) return 0;               /* Parameter not found */
+
+       /* Find next parameter */
+       s = d;
+       while (*s && *s != separator) {
+               s++;
+       }
+
+       ReducedBy = d - s;
+
+       /* Hack and slash */
+       if (*s) {
+               memmove(d, s, Source->BufUsed - (s - Source->buf));
+               Source->BufUsed -= (ReducedBy + 1);
+       }
+       else if (d == Source->buf) {
+               *d = 0;
+               Source->BufUsed = 0;
+       }
+       else {
+               *--d = 0;
+               Source->BufUsed -= (ReducedBy + 1);
+       }
+       /*
+       while (*s) {
+               *d++ = *s++;
+       }
+       *d = 0;
+       */
+       return ReducedBy;
+}
+
+
 /**
  * \brief a string tokenizer
  * \param dest Destination StringBuffer
@@ -876,6 +960,97 @@ int StrBufTCP_read_line(StrBuf *buf, int *fd, int append, const char **Error)
        return len - slen;
 }
 
+/**
+ * \brief Read a line from socket
+ * flushes and closes the FD on error
+ * \param buf the buffer to get the input to
+ * \param fd pointer to the filedescriptor to read
+ * \param append Append to an existing string or replace?
+ * \param Error strerror() on error 
+ * \returns numbers of chars read
+ */
+int StrBufTCP_read_buffered_line(StrBuf *Line, 
+                                StrBuf *buf, 
+                                int *fd, 
+                                int timeout, 
+                                int selectresolution, 
+                                const char **Error)
+{
+       int len, rlen;
+       int nSuccessLess = 0;
+       fd_set rfds;
+       char *pch = NULL;
+        int fdflags;
+       struct timeval tv;
+
+       if (buf->BufUsed > 0) {
+               pch = strchr(buf->buf, '\n');
+               if (pch != NULL) {
+                       rlen = 0;
+                       len = pch - buf->buf;
+                       if (len > 0 && (*(pch - 1) == '\r') )
+                               rlen ++;
+                       StrBufSub(Line, buf, 0, len - rlen);
+                       StrBufCutLeft(buf, len + 1);
+                       return len - rlen;
+               }
+       }
+       
+       if (buf->BufSize - buf->BufUsed < 10)
+               IncreaseBuf(buf, 1, -1);
+
+       fdflags = fcntl(*fd, F_GETFL);
+       if ((fdflags & O_NONBLOCK) == O_NONBLOCK)
+               return -1;
+
+       while ((nSuccessLess < timeout) && (pch == NULL)) {
+               tv.tv_sec = selectresolution;
+               tv.tv_usec = 0;
+               
+               FD_ZERO(&rfds);
+               FD_SET(*fd, &rfds);
+               if (select(*fd + 1, NULL, &rfds, NULL, &tv) == -1) {
+                       *Error = strerror(errno);
+                       close (*fd);
+                       *fd = -1;
+                       return -1;
+               }               
+               if (FD_ISSET(*fd, &rfds)) {
+                       rlen = read(*fd, 
+                                   &buf->buf[buf->BufUsed], 
+                                   buf->BufSize - buf->BufUsed - 1);
+                       if (rlen < 1) {
+                               *Error = strerror(errno);
+                               close(*fd);
+                               *fd = -1;
+                               return -1;
+                       }
+                       else if (rlen > 0) {
+                               nSuccessLess = 0;
+                               buf->BufUsed += rlen;
+                               buf->buf[buf->BufUsed] = '\0';
+                               if (buf->BufUsed + 10 > buf->BufSize) {
+                                       IncreaseBuf(buf, 1, -1);
+                               }
+                               pch = strchr(buf->buf, '\n');
+                               continue;
+                       }
+               }
+               nSuccessLess ++;
+       }
+       if (pch != NULL) {
+               rlen = 0;
+               len = pch - buf->buf;
+               if (len > 0 && (*(pch - 1) == '\r') )
+                       rlen ++;
+               StrBufSub(Line, buf, 0, len - rlen);
+               StrBufCutLeft(buf, len + 1);
+               return len - rlen;
+       }
+       return -1;
+
+}
+
 /**
  * \brief Input binary data from socket
  * flushes and closes the FD on error
@@ -965,6 +1140,19 @@ void StrBufCutRight(StrBuf *Buf, int nChars)
 }
 
 
+void StrBufUpCase(StrBuf *Buf) 
+{
+       char *pch, *pche;
+
+       pch = Buf->buf;
+       pche = pch + Buf->BufUsed;
+       while (pch < pche) {
+               *pch = toupper(*pch);
+               pch ++;
+       }
+}
+
+
 /**
  * \brief unhide special chars hidden to the HTML escaper
  * \param target buffer to put the unescaped string in
@@ -1124,7 +1312,7 @@ int CompressBuffer(StrBuf *Buf)
                          &compressed_len,
                          (Bytef *) Buf->buf,
                          (uLongf) Buf->BufUsed, Z_BEST_SPEED) == Z_OK) {
-               if (!ConstBuf)
+               if (!Buf->ConstBuf)
                        free(Buf->buf);
                Buf->buf = compressed_data;
                Buf->BufUsed = compressed_len;
@@ -1289,3 +1477,258 @@ void StrBufReplaceChars(StrBuf *buf, char search, char replace)
                        buf->buf[i] = replace;
 
 }
+
+
+
+/*
+ * Wrapper around iconv_open()
+ * Our version adds aliases for non-standard Microsoft charsets
+ * such as 'MS950', aliasing them to names like 'CP950'
+ *
+ * tocode      Target encoding
+ * fromcode    Source encoding
+ */
+static iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode)
+{
+       iconv_t ic = (iconv_t)(-1) ;
+       ic = iconv_open(tocode, fromcode);
+       if (ic == (iconv_t)(-1) ) {
+               char alias_fromcode[64];
+               if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) {
+                       safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode);
+                       alias_fromcode[0] = 'C';
+                       alias_fromcode[1] = 'P';
+                       ic = iconv_open(tocode, alias_fromcode);
+               }
+       }
+       return(ic);
+}
+
+
+#ifdef HAVE_ICONV
+
+static inline char *FindNextEnd (StrBuf *Buf, char *bptr)
+{
+       char * end;
+       /* Find the next ?Q? */
+       if (Buf->BufUsed - (bptr - Buf->buf)  < 6)
+               return NULL;
+
+       end = strchr(bptr + 2, '?');
+
+       if (end == NULL)
+               return NULL;
+
+       if ((Buf->BufUsed - (end - Buf->buf) > 3) &&
+           ((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && 
+           (*(end + 2) == '?')) {
+               /* skip on to the end of the cluster, the next ?= */
+               end = strstr(end + 3, "?=");
+       }
+       else
+               /* sort of half valid encoding, try to find an end. */
+               end = strstr(bptr, "?=");
+       return end;
+}
+
+
+/*
+ * Handle subjects with RFC2047 encoding such as:
+ * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
+ */
+void StrBuf_RFC822_to_Utf8(StrBuf **Buf, const StrBuf* DefaultCharset) {
+       StrBuf *TmpBuf, *ConvertBuf, *ConvertBuf2;
+       StrBuf StaticBuf;
+       char *start, *end, *next, *nextend, *ptr;
+       char charset[128];
+       char encoding[16];
+       iconv_t ic = (iconv_t)(-1) ;
+       char *ibuf;                     /**< Buffer of characters to be converted */
+       char *obuf;                     /**< Buffer for converted characters */
+       size_t ibuflen;                 /**< Length of input buffer */
+       size_t obuflen;                 /**< Length of output buffer */
+       char *isav;                     /**< Saved pointer to input buffer */
+       
+       const char *eptr;
+       int passes = 0;
+       int i, len, delta;
+       int illegal_non_rfc2047_encoding = 0;
+
+       /* Sometimes, badly formed messages contain strings which were simply
+        *  written out directly in some foreign character set instead of
+        *  using RFC2047 encoding.  This is illegal but we will attempt to
+        *  handle it anyway by converting from a user-specified default
+        *  charset to UTF-8 if we see any nonprintable characters.
+        */
+       TmpBuf = NewStrBufPlain(NULL, StrLength(*Buf));
+
+       len = StrLength(*Buf);
+       for (i=0; i<(*Buf)->BufUsed; ++i) {
+               if (((*Buf)->buf[i] < 32) || ((*Buf)->buf[i] > 126)) {
+                       illegal_non_rfc2047_encoding = 1;
+                       break;
+               }
+       }
+
+       if (illegal_non_rfc2047_encoding) {
+               if ( (strcasecmp(ChrPtr(DefaultCharset), "UTF-8")) && 
+                    (strcasecmp(ChrPtr(DefaultCharset), "us-ascii")) ) {
+                       ic = ctdl_iconv_open("UTF-8", ChrPtr(DefaultCharset));
+                       if (ic != (iconv_t)(-1) ) {
+                               ibuf = (*Buf)->buf;
+                               obuf = TmpBuf->buf;
+                               ibuflen = (*Buf)->BufUsed;
+                               obuflen = TmpBuf->BufSize;
+
+                               iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
+                               TmpBuf->BufUsed = TmpBuf->BufSize - obuflen;
+                               TmpBuf->buf[TmpBuf->BufUsed] = '\0';
+
+                               FreeStrBuf(Buf);
+                               *Buf = TmpBuf;
+                               TmpBuf = NewStrBufPlain(NULL, StrLength(*Buf));
+
+                               iconv_close(ic);
+                       }
+               }
+       }
+
+       /* pre evaluate the first pair */
+       nextend = end = NULL;
+       len = StrLength(*Buf);
+       start = strstr((*Buf)->buf, "=?");
+       eptr = (*Buf)->buf + (*Buf)->BufUsed;
+       if (start != NULL) 
+               end = FindNextEnd (*Buf, start);
+
+       while ((start != NULL) && 
+              (end != NULL) && 
+              (start < eptr) && 
+              (end < eptr))
+       {
+               next = strstr(end, "=?");
+               nextend = NULL;
+               if ((next != NULL) && 
+                   (next < eptr))
+                       nextend = FindNextEnd(*Buf, next);
+               if (nextend == NULL)
+                       next = NULL;
+
+               /* did we find two partitions */
+               if ((next != NULL) && 
+                   ((next - end) > 2))
+               {
+                       ptr = end + 2;
+                       while ((ptr < next) && 
+                              (isspace(*ptr) ||
+                               (*ptr == '\r') ||
+                               (*ptr == '\n') || 
+                               (*ptr == '\t')))
+                               ptr ++;
+                       /* did we find a gab just filled with blanks? */
+                       if (ptr == next)
+                       {
+                               memmove (end + 2,
+                                        next,
+                                        len - (next - start));
+                               
+                               /* now terminate the gab at the end */
+                               delta = (next - end) - 2;
+                               (*Buf)->BufUsed -= delta;
+                               (*Buf)->buf[(*Buf)->BufUsed] = '\0';
+
+                               /* move next to its new location. */
+                               next -= delta;
+                               nextend -= delta;
+                       }
+               }
+               /* our next-pair is our new first pair now. */
+               start = next;
+               end = nextend;
+       }
+
+       ConvertBuf = NewStrBufPlain(NULL, StrLength(*Buf));
+       ConvertBuf2 = NewStrBufPlain(NULL, StrLength(*Buf));
+       /* Now we handle foreign character sets properly encoded
+        * in RFC2047 format.
+        */
+       while (start=strstr((*Buf)->buf, "=?"), 
+              end=FindNextEnd((*Buf), ((start != NULL)? start : (*Buf)->buf)),
+                              ((start != NULL) && 
+                               (end != NULL) && 
+                               (end > start)) )
+       {
+               StaticBuf.buf = start;
+               StaticBuf.BufUsed = (*Buf)->BufUsed - ((*Buf)->buf - start);
+               StaticBuf.BufSize = (*Buf)->BufSize - ((*Buf)->buf - start);
+               extract_token(charset, start, 1, '?', sizeof charset);
+               extract_token(encoding, start, 2, '?', sizeof encoding);
+               StrBufExtract_token(ConvertBuf, &StaticBuf, 3, '?');
+
+               if (!strcasecmp(encoding, "B")) {       /**< base64 */
+                       ConvertBuf2->BufUsed = CtdlDecodeBase64(ConvertBuf2->buf, 
+                                                               ConvertBuf->buf, 
+                                                               ConvertBuf->BufUsed);
+               }
+               else if (!strcasecmp(encoding, "Q")) {  /**< quoted-printable */
+                       long pos;
+                       
+                       pos = 0;
+                       while (pos < ConvertBuf->BufUsed)
+                       {
+                               if (ConvertBuf->buf[pos] == '_') 
+                                       ConvertBuf->buf[pos] = ' ';
+                               pos++;
+                       }
+
+                       ConvertBuf2->BufUsed = CtdlDecodeQuotedPrintable(
+                               ConvertBuf2->buf, 
+                               ConvertBuf->buf,
+                               ConvertBuf->BufUsed);
+               }
+               else {
+                       StrBufAppendBuf(ConvertBuf2, ConvertBuf, 0);
+               }
+
+               ic = ctdl_iconv_open("UTF-8", charset);
+               if (ic != (iconv_t)(-1) ) {
+                       ibuf = ConvertBuf2->buf;
+                       obuf = ConvertBuf->buf;
+                       ibuf = ConvertBuf2->buf;
+                       obuflen = ConvertBuf->BufSize;
+                       ibuflen = ConvertBuf2->BufUsed;
+
+                       iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
+                       ConvertBuf->BufUsed = ConvertBuf->BufSize - obuflen;
+                       ConvertBuf->buf[ConvertBuf->BufUsed] = '\0';
+
+                       StrBufAppendBuf(TmpBuf, ConvertBuf, 0);
+                       iconv_close(ic);
+               }
+               else {
+
+                       StrBufAppendBufPlain(TmpBuf, HKEY("(unreadable)"), 0);
+               }
+
+               free(isav);
+
+               /*
+                * Since spammers will go to all sorts of absurd lengths to get their
+                * messages through, there are LOTS of corrupt headers out there.
+                * So, prevent a really badly formed RFC2047 header from throwing
+                * this function into an infinite loop.
+                */
+               ++passes;
+               if (passes > 20)  { 
+                       FreeStrBuf(Buf);
+                       *Buf = TmpBuf;
+                       return;
+               }
+       }
+       FreeStrBuf(Buf);
+       *Buf = TmpBuf;
+}
+#else
+void StrBuf_RFC822_to_Utf8(StrBuf **Buf, const StrBuf* DefaultCharset) {};
+
+#endif