Fix detecting of UTF8 Sequences
[citadel.git] / libcitadel / lib / stringbuf.c
index 9318ba66ca3a77fbd2fe91e0d80eb0eccb65977f..2e91d50f1b998c7e0a1cfdc3c97d5ec1e72b966f 100644 (file)
@@ -2144,31 +2144,83 @@ long StrECMAEscAppend(StrBuf *Target, const StrBuf *Source, const char *PlainIn)
                return -1;
 
        bptr = Target->buf + Target->BufUsed;
-       eptr = Target->buf + Target->BufSize - 3; /* our biggest unit to put in...  */
+       eptr = Target->buf + Target->BufSize - 7; /* our biggest unit to put in...  */
 
        while (aptr < eiptr){
                if(bptr >= eptr) {
                        IncreaseBuf(Target, 1, -1);
-                       eptr = Target->buf + Target->BufSize - 3; 
+                       eptr = Target->buf + Target->BufSize - 7; /* our biggest unit to put in...  */
                        bptr = Target->buf + Target->BufUsed;
                }
-               if (*aptr == '"') {
+               switch (*aptr) {
+               case '\n':
+                       memcpy(bptr, HKEY("\\n"));
+                       bptr += 2;
+                       Target->BufUsed += 2;                           
+                       break;
+               case '\r':
+                       memcpy(bptr, HKEY("\\r"));
+                       bptr += 2;
+                       Target->BufUsed += 2;
+                       break;
+               case '"':
                        *bptr = '\\';
                        bptr ++;
                        *bptr = '"';
                        bptr ++;
                        Target->BufUsed += 2;
-               } else if (*aptr == '\\') {
+                       break;
+               case '\\':
+                       if ((*(aptr + 1) == 'u') &&
+                           isxdigit(*(aptr + 2)) &&
+                           isxdigit(*(aptr + 3)) &&
+                           isxdigit(*(aptr + 4)) &&
+                           isxdigit(*(aptr + 5)))
+                       { /* oh, a unicode escaper. let it pass through. */
+                               memcpy(bptr, aptr, 6);
+                               aptr += 5;
+                               bptr +=6;
+                               Target->BufUsed += 6;
+                       }
+                       else 
+                       {
+                               *bptr = '\\';
+                               bptr ++;
+                               *bptr = '\\';
+                               bptr ++;
+                               Target->BufUsed += 2;
+                       }
+                       break;
+               case '\b':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'b';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               case '\f':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'f';
+                       bptr ++;
                        Target->BufUsed += 2;
-               }
-               else{
-                       *bptr = *aptr;
-                       bptr++;
-                       Target->BufUsed ++;
+                       break;
+               case '\t':
+                       *bptr = '\\';
+                       bptr ++;
+                       *bptr = 't';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               default:
+                       IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(aptr, eiptr);
+                       while (IsUtf8Sequence > 0){
+                               *bptr = *aptr;
+                               Target->BufUsed ++;
+                               if (--IsUtf8Sequence)
+                                       aptr++;
+                               bptr++;
+                       }
                }
                aptr ++;
        }
@@ -2222,83 +2274,127 @@ long StrHtmlEcmaEscAppend(StrBuf *Target, const StrBuf *Source, const char *Plai
                        eptr = Target->buf + Target->BufSize - 11; /* our biggest unit to put in...  */
                        bptr = Target->buf + Target->BufUsed;
                }
-               if (*aptr == '<') {
-                       memcpy(bptr, "&lt;", 4);
+               switch (*aptr) {
+               case '<':
+                       memcpy(bptr, HKEY("&lt;"));
                        bptr += 4;
                        Target->BufUsed += 4;
-               }
-               else if (*aptr == '>') {
-                       memcpy(bptr, "&gt;", 4);
+                       break;
+               case '>':
+                       memcpy(bptr, HKEY("&gt;"));
                        bptr += 4;
                        Target->BufUsed += 4;
-               }
-               else if (*aptr == '&') {
-                       memcpy(bptr, "&amp;", 5);
+                       break;
+               case '&':
+                       memcpy(bptr, HKEY("&amp;"));
                        bptr += 5;
                        Target->BufUsed += 5;
-               }
-               else if (*aptr == LB) {
+                       break;
+               case LB:
                        *bptr = '<';
                        bptr ++;
                        Target->BufUsed ++;
-               }
-               else if (*aptr == RB) {
+                       break;
+               case RB:
                        *bptr = '>';
                        bptr ++;
                        Target->BufUsed ++;
-               }
-               else if ((*aptr == 32) && (nbsp == 1)) {
-                       memcpy(bptr, "&nbsp;", 6);
-                       bptr += 6;
-                       Target->BufUsed += 6;
-               }
-               else if ((*aptr == '\n') && (nolinebreaks == 1)) {
-                       *bptr='\0';     /* nothing */
-               }
-               else if ((*aptr == '\n') && (nolinebreaks == 2)) {
-                       memcpy(bptr, "&lt;br/&gt;", 11);
-                       bptr += 11;
-                       Target->BufUsed += 11;
-               }
-
-               else if ((*aptr == '\r') && (nolinebreaks != 0)) {
-                       *bptr='\0';     /* nothing */
-               }
-
-               else if ((*aptr == '"') || (*aptr == QU)) {
+                       break;
+               case '\n':
+                       switch (nolinebreaks) {
+                       case 1:
+                               *bptr='\0';     /* nothing */
+                               break;
+                       case 2:
+                               memcpy(bptr, HKEY("&lt;br/&gt;"));
+                               bptr += 11;
+                               Target->BufUsed += 11;
+                               break;
+                       default:
+                               memcpy(bptr, HKEY("\\n"));
+                               bptr += 2;
+                               Target->BufUsed += 2;                           
+                       }
+                       break;
+               case '\r':
+                       switch (nolinebreaks) {
+                       case 1:
+                       case 2:
+                               *bptr='\0';     /* nothing */
+                               break;
+                       default:
+                               memcpy(bptr, HKEY("\\r"));
+                               bptr += 2;
+                               Target->BufUsed += 2;
+                               break;
+                       }
+                       break;
+               case '"':
+               case QU:
                        *bptr = '\\';
                        bptr ++;
                        *bptr = '"';
                        bptr ++;
                        Target->BufUsed += 2;
-               } else if (*aptr == '\\') {
+                       break;
+               case '\\':
+                       if ((*(aptr + 1) == 'u') &&
+                           isxdigit(*(aptr + 2)) &&
+                           isxdigit(*(aptr + 3)) &&
+                           isxdigit(*(aptr + 4)) &&
+                           isxdigit(*(aptr + 5)))
+                       { /* oh, a unicode escaper. let it pass through. */
+                               memcpy(bptr, aptr, 6);
+                               aptr += 5;
+                               bptr +=6;
+                               Target->BufUsed += 6;
+                       }
+                       else 
+                       {
+                               *bptr = '\\';
+                               bptr ++;
+                               *bptr = '\\';
+                               bptr ++;
+                               Target->BufUsed += 2;
+                       }
+                       break;
+               case '\b':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'b';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               case '\f':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'f';
+                       bptr ++;
                        Target->BufUsed += 2;
-               }
-               else {
-                       if (((unsigned char)*aptr) >= 0x20)
-                       {
-                               IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(aptr, eiptr);
-                               
+                       break;
+               case '\t':
+                       *bptr = '\\';
+                       bptr ++;
+                       *bptr = 't';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               case  32:
+                       if (nbsp == 1) {
+                               memcpy(bptr, HKEY("&nbsp;"));
+                               bptr += 6;
+                               Target->BufUsed += 6;
+                               break;
+                       }
+               default:
+                       IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(aptr, eiptr);
+                       while (IsUtf8Sequence > 0){
                                *bptr = *aptr;
                                Target->BufUsed ++;
-                               while (IsUtf8Sequence > 1){
-                                       if(bptr + IsUtf8Sequence >= eptr) {
-                                               IncreaseBuf(Target, 1, -1);
-                                               eptr = Target->buf + Target->BufSize - 11; /* our biggest unit to put in...  */
-                                               bptr = Target->buf + Target->BufUsed - 1;
-                                       }
-                                       bptr++; aptr++;
-                                       IsUtf8Sequence --;
-                                       *bptr = *aptr;
-                                       Target->BufUsed ++;
-                               }
+                               if (--IsUtf8Sequence)
+                                       aptr++;
                                bptr++;
                        }
-
                }
                aptr ++;
        }
@@ -2563,7 +2659,13 @@ int StrBufRFC2047encode(StrBuf **target, const StrBuf *source)
                if ((*target)->BufUsed + 4 >= (*target)->BufSize)
                        IncreaseBuf(*target, 1, 0);
                ch = (unsigned char) source->buf[i];
-               if ((ch < 32) || (ch > 126) || (ch == 61)) {
+               if ((ch  <  32) || 
+                   (ch  > 126) || 
+                   (ch ==  61) ||
+                   (ch == '=') ||
+                   (ch == '[') ||
+                   (ch == ']')   )
+               {
                        sprintf(&(*target)->buf[(*target)->BufUsed], "=%02X", ch);
                        (*target)->BufUsed += 3;
                }
@@ -2622,15 +2724,12 @@ StrBuf *StrBufSanitizeEmailRecipientVector(const StrBuf *Recp,
                                           StrBuf *EncBuf)
 {
        StrBuf *Target;
-       int need_to_encode;
-
        const char *pch, *pche;
        const char *UserStart, *UserEnd, *EmailStart, *EmailEnd, *At;
 
        if ((Recp == NULL) || (StrLength(Recp) == 0))
                return NULL;
 
-       need_to_encode = 0;
        pch = ChrPtr(Recp);
        pche = pch + StrLength(Recp);
 
@@ -2641,8 +2740,6 @@ StrBuf *StrBufSanitizeEmailRecipientVector(const StrBuf *Recp,
 
        while ((pch != NULL) && (pch < pche))
        {
-               int ColonOk = 0;
-
                while (isspace(*pch)) pch++;
                UserStart = UserEnd = EmailStart = EmailEnd = NULL;
                
@@ -2672,7 +2769,6 @@ StrBuf *StrBufSanitizeEmailRecipientVector(const StrBuf *Recp,
                        if (EmailEnd == NULL)
                                EmailEnd = pche;
                        pch = EmailEnd + 1;
-                       ColonOk = 1;
                }
                else {
                        int gt = 0;
@@ -2780,7 +2876,7 @@ void StrBufReplaceChars(StrBuf *buf, char search, char replace)
 
 /**
  * @ingroup StrBuf
- * @brief removes all \r s from the string, or replaces them with \n if its not a combination of both.
+ * @brief removes all \\r s from the string, or replaces them with \n if its not a combination of both.
  * @param buf Buffer to modify
  */
 void StrBufToUnixLF(StrBuf *buf)
@@ -3212,19 +3308,24 @@ void StrBuf_RFC822_2_Utf8(StrBuf *Target,
  * @ingroup StrBuf
  * @brief evaluate the length of an utf8 special character sequence
  * @param Char the character to examine
- * @returns width of utf8 chars in bytes
+ * @returns width of utf8 chars in bytes; if the sequence is broken 0 is returned; 1 if its simply ASCII.
  */
 static inline int Ctdl_GetUtf8SequenceLength(const char *CharS, const char *CharE)
 {
-       int n = 1;
-        char test = (1<<7);
-       
-       while ((n < 8) && ((test & *CharS) != 0)) {
-               test = test << 1;
+       int n = 0;
+        unsigned char test = (1<<7);
+
+       if ((*CharS & 0xC0) != 0xC0) 
+               return 1;
+
+       while ((n < 8) && 
+              ((test & ((unsigned char)*CharS)) != 0)) 
+       {
+               test = test >> 1;
                n ++;
        }
        if ((n > 6) || ((CharE - CharS) < n))
-               n = 1;
+               n = 0;
        return n;
 }
 
@@ -3237,7 +3338,7 @@ static inline int Ctdl_GetUtf8SequenceLength(const char *CharS, const char *Char
 static inline int Ctdl_IsUtf8SequenceStart(const char Char)
 {
 /** 11??.???? indicates an UTF8 Sequence. */
-       return ((Char & 0xC0) != 0);
+       return ((Char & 0xC0) == 0xC0);
 }
 
 /**
@@ -3991,7 +4092,7 @@ static const char *ErrRBLF_BLOBPreConditionFailed="StrBufReadBLOB: Wrong argumen
 int StrBufReadBLOB(StrBuf *Buf, int *fd, int append, long nBytes, const char **Error)
 {
        int fdflags;
-       int len, rlen, slen;
+       int rlen;
        int nSuccessLess;
        int nRead = 0;
        char *ptr;
@@ -4011,8 +4112,6 @@ int StrBufReadBLOB(StrBuf *Buf, int *fd, int append, long nBytes, const char **E
 
        ptr = Buf->buf + Buf->BufUsed;
 
-       slen = len = Buf->BufUsed;
-
        fdflags = fcntl(*fd, F_GETFL);
        IsNonBlock = (fdflags & O_NONBLOCK) == O_NONBLOCK;
        nSuccessLess = 0;
@@ -4081,17 +4180,15 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
                           int check, 
                           const char **Error)
 {
-       const char *pche;
        const char *pos;
        int fdflags;
        int len = 0;
-       int rlen, slen;
+       int rlen;
        int nRead = 0;
        int nAlreadyRead = 0;
        int IsNonBlock;
        char *ptr;
        fd_set rfds;
-       const char *pch;
        struct timeval tv;
        int nSuccessLess = 0;
        int MaxTries;
@@ -4120,9 +4217,6 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
            (pos != NULL) && 
            (pos < IOBuf->buf + IOBuf->BufUsed)) 
        {
-               pche = IOBuf->buf + IOBuf->BufUsed;
-               pch = pos;
-
                if (rlen < nBytes) {
                        memcpy(Blob->buf + Blob->BufUsed, pos, rlen);
                        Blob->BufUsed += rlen;
@@ -4150,7 +4244,7 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
                IncreaseBuf(IOBuf, 0, nBytes - nRead);
        ptr = IOBuf->buf;
 
-       slen = len = Blob->BufUsed;
+       len = Blob->BufUsed;
 
        fdflags = fcntl(*fd, F_GETFL);
        IsNonBlock = (fdflags & O_NONBLOCK) == O_NONBLOCK;
@@ -4243,7 +4337,7 @@ int StrBufReadBLOBBuffered(StrBuf *Blob,
  *        has to be &NULL on start; will be &NotNULL on end of buffer
  * @returns size of remaining buffer
  */
-int StrBufSipLine(StrBuf *LineBuf, StrBuf *Buf, const char **Ptr)
+int StrBufSipLine(StrBuf *LineBuf, const StrBuf *Buf, const char **Ptr)
 {
        const char *aptr, *ptr, *eptr;
        char *optr, *xptr;