Fix StrHtmlEcmaEscAppend() more edge cases in utf8 handling
[citadel.git] / libcitadel / lib / stringbuf.c
index 6d8f549d371d2e00aac73d2e945f004d6564d635..18d9870b5067660242fd5e17e32913402244bfb5 100644 (file)
@@ -276,9 +276,12 @@ static int IncreaseBuf(StrBuf *Buf, int KeepOriginal, int DestSize)
                return -1;
                
        if (DestSize > 0)
-               while (NewSize <= DestSize)
+               while ((NewSize <= DestSize) && (NewSize != 0))
                        NewSize *= 2;
 
+       if (NewSize == 0)
+               return -1;
+
        NewBuf= (char*) malloc(NewSize);
        if (NewBuf == NULL)
                return -1;
@@ -479,9 +482,14 @@ StrBuf* NewStrBufPlain(const char* ptr, int nChars)
        else
                CopySize = nChars;
 
-       while (Siz <= CopySize)
+       while ((Siz <= CopySize) && (Siz != 0))
                Siz *= 2;
 
+       if (Siz == 0)
+       {
+               return NULL;
+       }
+
        NewBuf->buf = (char*) malloc(Siz);
        if (NewBuf->buf == NULL)
        {
@@ -532,9 +540,14 @@ int StrBufPlain(StrBuf *Buf, const char* ptr, int nChars)
        else
                CopySize = nChars;
 
-       while (Siz <= CopySize)
+       while ((Siz <= CopySize) && (Siz != 0))
                Siz *= 2;
 
+       if (Siz == 0) {
+               FlushStrBuf(Buf);
+               return -1;
+       }
+
        if (Siz != Buf->BufSize)
                IncreaseBuf(Buf, 0, Siz);
        memcpy(Buf->buf, ptr, CopySize);
@@ -858,7 +871,8 @@ void StrBufVAppendPrintf(StrBuf *Buf, const char *format, va_list ap)
                va_end(apl);
                newused = Offset + nWritten;
                if (newused >= Buf->BufSize) {
-                       IncreaseBuf(Buf, 1, newused);
+                       if (IncreaseBuf(Buf, 1, newused) == -1)
+                               return; /* TODO: error handling? */
                        newused = Buf->BufSize + 1;
                }
                else {
@@ -899,7 +913,8 @@ void StrBufAppendPrintf(StrBuf *Buf, const char *format, ...)
                va_end(arg_ptr);
                newused = Buf->BufUsed + nWritten;
                if (newused >= Buf->BufSize) {
-                       IncreaseBuf(Buf, 1, newused);
+                       if (IncreaseBuf(Buf, 1, newused) == -1)
+                               return; /* TODO: error handling? */
                        newused = Buf->BufSize + 1;
                }
                else {
@@ -930,7 +945,8 @@ void StrBufPrintf(StrBuf *Buf, const char *format, ...)
                nWritten = vsnprintf(Buf->buf, Buf->BufSize, format, arg_ptr);
                va_end(arg_ptr);
                if (nWritten >= Buf->BufSize) {
-                       IncreaseBuf(Buf, 0, 0);
+                       if (IncreaseBuf(Buf, 0, 0) == -1)
+                               return; /* TODO: error handling? */
                        nWritten = Buf->BufSize + 1;
                        continue;
                }
@@ -1333,7 +1349,7 @@ int StrBufExtract_token(StrBuf *dest, const StrBuf *Source, int parmnum, char se
        //cit_backtrace();
        //lprintf (CTDL_DEBUG, "test >: n: %d sep: %c source: %s \n willi \n", parmnum, separator, source);
 
-       while ((s<e) && !IsEmptyStr(s)) {
+       while ((s < e) && !IsEmptyStr(s)) {
                if (*s == separator) {
                        ++current_token;
                }
@@ -1502,6 +1518,8 @@ int StrBufExtract_NextToken(StrBuf *dest, const StrBuf *Source, const char **pSt
            (Source->BufUsed == 0)      ) 
        {
                *pStart = StrBufNOTNULL;
+               if (dest != NULL)
+                       FlushStrBuf(dest);
                return -1;
        }
         
@@ -1618,7 +1636,7 @@ int StrBufSkip_NTokenS(const StrBuf *Source, const char **pStart, char separator
        //cit_backtrace();
        //lprintf (CTDL_DEBUG, "test >: n: %d sep: %c source: %s \n willi \n", parmnum, separator, source);
 
-       while ((s<EndBuffer) && !IsEmptyStr(s)) {
+       while ((s < EndBuffer) && !IsEmptyStr(s)) {
                if (*s == separator) {
                        ++current_token;
                }
@@ -1785,22 +1803,26 @@ void StrBufUrlescAppend(StrBuf *OutBuf, const StrBuf *In, const char *PlainIn)
  * @param OutBuf the output buffer
  * @param In Buffer to encode
  * @param PlainIn way in from plain old c strings
+ * @param PlainInLen way in from plain old c strings; maybe you've got binary data or know the length?
  */
-void StrBufHexescAppend(StrBuf *OutBuf, const StrBuf *In, const char *PlainIn)
+void StrBufHexEscAppend(StrBuf *OutBuf, const StrBuf *In, const unsigned char *PlainIn, long PlainInLen)
 {
-       const char *pch, *pche;
+       const unsigned char *pch, *pche;
        char *pt, *pte;
        int len;
        
        if (((In == NULL) && (PlainIn == NULL)) || (OutBuf == NULL) )
                return;
        if (PlainIn != NULL) {
-               len = strlen(PlainIn);
+               if (PlainInLen < 0)
+                       len = strlen((const char*)PlainIn);
+               else
+                       len = PlainInLen;
                pch = PlainIn;
                pche = pch + len;
        }
        else {
-               pch = In->buf;
+               pch = (const unsigned char*)In->buf;
                pche = pch + In->BufUsed;
                len = In->BufUsed;
        }
@@ -1818,14 +1840,26 @@ void StrBufHexescAppend(StrBuf *OutBuf, const StrBuf *In, const char *PlainIn)
                        pt = OutBuf->buf + OutBuf->BufUsed;
                }
 
-               *pt = HexList[(unsigned char)*pch][0];
+               *pt = HexList[*pch][0];
                pt ++;
-               *pt = HexList[(unsigned char)*pch][1];
+               *pt = HexList[*pch][1];
                pt ++; pch ++; OutBuf->BufUsed += 2;
        }
        *pt = '\0';
 }
 
+/** 
+ * @ingroup StrBuf_DeEnCoder
+ * @brief append a string in hex encoding to the buffer
+ * @param OutBuf the output buffer
+ * @param In Buffer to encode
+ * @param PlainIn way in from plain old c strings
+ */
+void StrBufHexescAppend(StrBuf *OutBuf, const StrBuf *In, const char *PlainIn)
+{
+       StrBufHexEscAppend(OutBuf, In, (const unsigned char*) PlainIn, -1);
+}
+
 /**
  * @ingroup StrBuf_DeEnCoder
  * @brief Append a string, escaping characters which have meaning in HTML.  
@@ -2188,83 +2222,126 @@ long StrHtmlEcmaEscAppend(StrBuf *Target, const StrBuf *Source, const char *Plai
                        eptr = Target->buf + Target->BufSize - 11; /* our biggest unit to put in...  */
                        bptr = Target->buf + Target->BufUsed;
                }
-               if (*aptr == '<') {
-                       memcpy(bptr, "&lt;", 4);
+               switch (*aptr) {
+               case '<':
+                       memcpy(bptr, HKEY("&lt;"));
                        bptr += 4;
                        Target->BufUsed += 4;
-               }
-               else if (*aptr == '>') {
-                       memcpy(bptr, "&gt;", 4);
+                       break;
+               case '>':
+                       memcpy(bptr, HKEY("&gt;"));
                        bptr += 4;
                        Target->BufUsed += 4;
-               }
-               else if (*aptr == '&') {
-                       memcpy(bptr, "&amp;", 5);
+                       break;
+               case '&':
+                       memcpy(bptr, HKEY("&amp;"));
                        bptr += 5;
                        Target->BufUsed += 5;
-               }
-               else if (*aptr == LB) {
+                       break;
+               case LB:
                        *bptr = '<';
                        bptr ++;
                        Target->BufUsed ++;
-               }
-               else if (*aptr == RB) {
+                       break;
+               case RB:
                        *bptr = '>';
                        bptr ++;
                        Target->BufUsed ++;
-               }
-               else if ((*aptr == 32) && (nbsp == 1)) {
-                       memcpy(bptr, "&nbsp;", 6);
+                       break;
+               case  32:
+//) && (nbsp == 1)) {
+                       memcpy(bptr, HKEY("&nbsp;"));
                        bptr += 6;
                        Target->BufUsed += 6;
-               }
-               else if ((*aptr == '\n') && (nolinebreaks == 1)) {
-                       *bptr='\0';     /* nothing */
-               }
-               else if ((*aptr == '\n') && (nolinebreaks == 2)) {
-                       memcpy(bptr, "&lt;br/&gt;", 11);
-                       bptr += 11;
-                       Target->BufUsed += 11;
-               }
-
-               else if ((*aptr == '\r') && (nolinebreaks != 0)) {
-                       *bptr='\0';     /* nothing */
-               }
-
-               else if ((*aptr == '"') || (*aptr == QU)) {
+                       break;
+               case '\n':
+                       switch (nolinebreaks) {
+                       case 1:
+                               *bptr='\0';     /* nothing */
+                               break;
+                       case 2:
+                               memcpy(bptr, HKEY("&lt;br/&gt;"));
+                               bptr += 11;
+                               Target->BufUsed += 11;
+                               break;
+                       default:
+                               memcpy(bptr, HKEY("\\n"));
+                               bptr += 2;
+                               Target->BufUsed += 2;                           
+                       }
+                       break;
+               case '\r':
+                       switch (nolinebreaks) {
+                       case 1:
+                       case 2:
+                               *bptr='\0';     /* nothing */
+                               break;
+                       default:
+                               memcpy(bptr, HKEY("\\r"));
+                               bptr += 2;
+                               Target->BufUsed += 2;
+                               break;
+                       }
+                       break;
+               case '"':
+               case QU:
                        *bptr = '\\';
                        bptr ++;
                        *bptr = '"';
                        bptr ++;
                        Target->BufUsed += 2;
-               } else if (*aptr == '\\') {
+                       break;
+               case '\\':
+                       if ((*(aptr + 1) == 'u') &&
+                           isxdigit(*(aptr + 2)) &&
+                           isxdigit(*(aptr + 3)) &&
+                           isxdigit(*(aptr + 4)) &&
+                           isxdigit(*(aptr + 5)))
+                       { /* oh, a unicode escaper. let it pass through. */
+                               memcpy(bptr, aptr, 6);
+                               aptr += 5;
+                               bptr +=6;
+                               Target->BufUsed += 6;
+                       }
+                       else 
+                       {
+                               *bptr = '\\';
+                               bptr ++;
+                               *bptr = '\\';
+                               bptr ++;
+                               Target->BufUsed += 2;
+                       }
+                       break;
+               case '\b':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'b';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               case '\f':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'f';
+                       bptr ++;
                        Target->BufUsed += 2;
-               }
-               else {
-                       if (((unsigned char)*aptr) >= 0x20)
-                       {
-                               IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(aptr, eiptr);
-                               
+                       break;
+               case '\t':
+                       *bptr = '\\';
+                       bptr ++;
+                       *bptr = 't';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               default:
+                       IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(aptr, eiptr);
+                       while (IsUtf8Sequence > 0){
                                *bptr = *aptr;
                                Target->BufUsed ++;
-                               while (IsUtf8Sequence > 1){
-                                       if(bptr + IsUtf8Sequence >= eptr) {
-                                               IncreaseBuf(Target, 1, -1);
-                                               eptr = Target->buf + Target->BufSize - 11; /* our biggest unit to put in...  */
-                                               bptr = Target->buf + Target->BufUsed - 1;
-                                       }
-                                       bptr++; aptr++;
-                                       IsUtf8Sequence --;
-                                       *bptr = *aptr;
-                                       Target->BufUsed ++;
-                               }
+                               if (--IsUtf8Sequence)
+                                       aptr++;
                                bptr++;
                        }
-
                }
                aptr ++;
        }
@@ -2529,7 +2606,13 @@ int StrBufRFC2047encode(StrBuf **target, const StrBuf *source)
                if ((*target)->BufUsed + 4 >= (*target)->BufSize)
                        IncreaseBuf(*target, 1, 0);
                ch = (unsigned char) source->buf[i];
-               if ((ch < 32) || (ch > 126) || (ch == 61)) {
+               if ((ch  <  32) || 
+                   (ch  > 126) || 
+                   (ch ==  61) ||
+                   (ch == '=') ||
+                   (ch == '[') ||
+                   (ch == ']')   )
+               {
                        sprintf(&(*target)->buf[(*target)->BufUsed], "=%02X", ch);
                        (*target)->BufUsed += 3;
                }
@@ -2588,15 +2671,12 @@ StrBuf *StrBufSanitizeEmailRecipientVector(const StrBuf *Recp,
                                           StrBuf *EncBuf)
 {
        StrBuf *Target;
-       int need_to_encode;
-
        const char *pch, *pche;
        const char *UserStart, *UserEnd, *EmailStart, *EmailEnd, *At;
 
        if ((Recp == NULL) || (StrLength(Recp) == 0))
                return NULL;
 
-       need_to_encode = 0;
        pch = ChrPtr(Recp);
        pche = pch + StrLength(Recp);
 
@@ -2607,8 +2687,6 @@ StrBuf *StrBufSanitizeEmailRecipientVector(const StrBuf *Recp,
 
        while ((pch != NULL) && (pch < pche))
        {
-               int ColonOk = 0;
-
                while (isspace(*pch)) pch++;
                UserStart = UserEnd = EmailStart = EmailEnd = NULL;
                
@@ -2638,7 +2716,6 @@ StrBuf *StrBufSanitizeEmailRecipientVector(const StrBuf *Recp,
                        if (EmailEnd == NULL)
                                EmailEnd = pche;
                        pch = EmailEnd + 1;
-                       ColonOk = 1;
                }
                else {
                        int gt = 0;
@@ -2746,7 +2823,7 @@ void StrBufReplaceChars(StrBuf *buf, char search, char replace)
 
 /**
  * @ingroup StrBuf
- * @brief removes all \r s from the string, or replaces them with \n if its not a combination of both.
+ * @brief removes all \\r s from the string, or replaces them with \n if its not a combination of both.
  * @param buf Buffer to modify
  */
 void StrBufToUnixLF(StrBuf *buf)
@@ -2828,7 +2905,8 @@ static inline const char *FindNextEnd (const StrBuf *Buf, const char *bptr)
                return NULL;
 
        if ((Buf->BufUsed - (end - Buf->buf) > 3) &&
-           ((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && 
+           (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) ||
+            ((*(end + 1) == 'b') || (*(end + 1) == 'q'))) && 
            (*(end + 2) == '?')) {
                /* skip on to the end of the cluster, the next ?= */
                end = strstr(end + 3, "?=");
@@ -3081,7 +3159,7 @@ void StrBuf_RFC822_2_Utf8(StrBuf *Target,
        start = strstr(DecodeMee->buf, "=?");
        eptr = DecodeMee->buf + DecodeMee->BufUsed;
        if (start != NULL) 
-               end = FindNextEnd (DecodeMee, start);
+               end = FindNextEnd (DecodeMee, start + 2);
        else {
                StrBufAppendBuf(Target, DecodeMee, 0);
                FreeStrBuf(&DecodedInvalidBuf);
@@ -3177,19 +3255,22 @@ void StrBuf_RFC822_2_Utf8(StrBuf *Target,
  * @ingroup StrBuf
  * @brief evaluate the length of an utf8 special character sequence
  * @param Char the character to examine
- * @returns width of utf8 chars in bytes
+ * @returns width of utf8 chars in bytes; if the sequence is broken 0 is returned; 1 if its simply ASCII.
  */
 static inline int Ctdl_GetUtf8SequenceLength(const char *CharS, const char *CharE)
 {
        int n = 1;
         char test = (1<<7);
-       
+
+       if ((*CharS & 0xC0) == 0) 
+               return 1;
+
        while ((n < 8) && ((test & *CharS) != 0)) {
                test = test << 1;
                n ++;
        }
        if ((n > 6) || ((CharE - CharS) < n))
-               n = 1;
+               n = 0;
        return n;
 }
 
@@ -3400,18 +3481,27 @@ long StrBuf_read_one_chunk_callback (int fd, short event, IOBuffer *FB)
 {
        long bufremain = 0;
        int n;
+       
+       if ((FB == NULL) || (FB->Buf == NULL))
+               return -1;
 
        /*
         * check whether the read pointer is somewhere in a range 
         * where a cut left is inexpensive
         */
+
        if (FB->ReadWritePointer != NULL)
        {
-               long already_read = FB->ReadWritePointer - FB->Buf->buf;
-               bufremain = FB->Buf->BufSize - FB->Buf->BufUsed;
+               long already_read;
+               
+               already_read = FB->ReadWritePointer - FB->Buf->buf;
+               bufremain = FB->Buf->BufSize - FB->Buf->BufUsed - 1;
 
                if (already_read != 0) {
-                       long unread = FB->Buf->BufUsed - already_read;
+                       long unread;
+                       
+                       unread = FB->Buf->BufUsed - already_read;
+
                        /* else nothing to compact... */
                        if (unread == 0) {
                                FB->ReadWritePointer = FB->Buf->buf;
@@ -3430,22 +3520,35 @@ long StrBuf_read_one_chunk_callback (int fd, short event, IOBuffer *FB)
                                else
                                        memmove(FB->Buf->buf, FB->ReadWritePointer, unread);
                                FB->ReadWritePointer = FB->Buf->buf;
-                               bufremain = FB->Buf->BufSize - unread;
+                               bufremain = FB->Buf->BufSize - unread - 1;
                        }
-                       else if (bufremain < (FB->Buf->BufSize / 10)) {
-                               /* get a bigger buffer */ ///TODO: special increase function that won't copy the already read!
-                               IncreaseBuf(FB->Buf, 0, -1);
+                       else if (bufremain < (FB->Buf->BufSize / 10))
+                       {
+                               /* get a bigger buffer */ 
+
+                               IncreaseBuf(FB->Buf, 0, FB->Buf->BufUsed + 1);
+
                                FB->ReadWritePointer = FB->Buf->buf + unread;
-                               bufremain = FB->Buf->BufSize - unread;
+
+                               bufremain = FB->Buf->BufSize - unread - 1;
+/*TODO: special increase function that won't copy the already read! */
                        }
                }
+               else if (bufremain < 10) {
+                       IncreaseBuf(FB->Buf, 1, FB->Buf->BufUsed + 10);
+                       
+                       FB->ReadWritePointer = FB->Buf->buf;
+                       
+                       bufremain = FB->Buf->BufSize - FB->Buf->BufUsed - 1;
+               }
                
        }
        else {
                FB->ReadWritePointer = FB->Buf->buf;
-               bufremain = FB->Buf->BufSize;
+               bufremain = FB->Buf->BufSize - 1;
        }
-       n = read(fd, FB->Buf->buf + FB->Buf->BufUsed, bufremain - 1);
+
+       n = read(fd, FB->Buf->buf + FB->Buf->BufUsed, bufremain);
 
        if (n > 0) {
                FB->Buf->BufUsed += n;
@@ -3459,6 +3562,9 @@ int StrBuf_write_one_chunk_callback(int fd, short event, IOBuffer *FB)
        long WriteRemain;
        int n;
 
+       if ((FB == NULL) || (FB->Buf == NULL))
+               return -1;
+
        if (FB->ReadWritePointer != NULL)
        {
                WriteRemain = FB->Buf->BufUsed - 
@@ -3931,7 +4037,7 @@ static const char *ErrRBLF_BLOBPreConditionFailed="StrBufReadBLOB: Wrong argumen
 int StrBufReadBLOB(StrBuf *Buf, int *fd, int append, long nBytes, const char **Error)
 {
        int fdflags;
-       int len, rlen, slen;
+       int rlen;
        int nSuccessLess;
        int nRead = 0;
        char *ptr;
@@ -3951,8 +4057,6 @@ int StrBufReadBLOB(StrBuf *Buf, int *fd, int append, long nBytes, const char **E
 
        ptr = Buf->buf + Buf->BufUsed;
 
-       slen = len = Buf->BufUsed;
-
        fdflags = fcntl(*fd, F_GETFL);
        IsNonBlock = (fdflags & O_NONBLOCK) == O_NONBLOCK;
        nSuccessLess = 0;
@@ -4021,17 +4125,15 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
                           int check, 
                           const char **Error)
 {
-       const char *pche;
        const char *pos;
        int fdflags;
        int len = 0;
-       int rlen, slen;
+       int rlen;
        int nRead = 0;
        int nAlreadyRead = 0;
        int IsNonBlock;
        char *ptr;
        fd_set rfds;
-       const char *pch;
        struct timeval tv;
        int nSuccessLess = 0;
        int MaxTries;
@@ -4060,9 +4162,6 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
            (pos != NULL) && 
            (pos < IOBuf->buf + IOBuf->BufUsed)) 
        {
-               pche = IOBuf->buf + IOBuf->BufUsed;
-               pch = pos;
-
                if (rlen < nBytes) {
                        memcpy(Blob->buf + Blob->BufUsed, pos, rlen);
                        Blob->BufUsed += rlen;
@@ -4090,7 +4189,7 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
                IncreaseBuf(IOBuf, 0, nBytes - nRead);
        ptr = IOBuf->buf;
 
-       slen = len = Blob->BufUsed;
+       len = Blob->BufUsed;
 
        fdflags = fcntl(*fd, F_GETFL);
        IsNonBlock = (fdflags & O_NONBLOCK) == O_NONBLOCK;
@@ -4181,9 +4280,9 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
  * @param Buf BLOB with lines of text...
  * @param Ptr moved arround to keep the next-line across several iterations
  *        has to be &NULL on start; will be &NotNULL on end of buffer
- * @returns size of copied buffer
+ * @returns size of remaining buffer
  */
-int StrBufSipLine(StrBuf *LineBuf, StrBuf *Buf, const char **Ptr)
+int StrBufSipLine(StrBuf *LineBuf, const StrBuf *Buf, const char **Ptr)
 {
        const char *aptr, *ptr, *eptr;
        char *optr, *xptr;