]> code.citadel.org Git - citadel.git/blobdiff - libcitadel/lib/stringbuf.c
Fix detecting of UTF8 Sequences
[citadel.git] / libcitadel / lib / stringbuf.c
index 18d9870b5067660242fd5e17e32913402244bfb5..2e91d50f1b998c7e0a1cfdc3c97d5ec1e72b966f 100644 (file)
@@ -2144,31 +2144,83 @@ long StrECMAEscAppend(StrBuf *Target, const StrBuf *Source, const char *PlainIn)
                return -1;
 
        bptr = Target->buf + Target->BufUsed;
-       eptr = Target->buf + Target->BufSize - 3; /* our biggest unit to put in...  */
+       eptr = Target->buf + Target->BufSize - 7; /* our biggest unit to put in...  */
 
        while (aptr < eiptr){
                if(bptr >= eptr) {
                        IncreaseBuf(Target, 1, -1);
-                       eptr = Target->buf + Target->BufSize - 3; 
+                       eptr = Target->buf + Target->BufSize - 7; /* our biggest unit to put in...  */
                        bptr = Target->buf + Target->BufUsed;
                }
-               if (*aptr == '"') {
+               switch (*aptr) {
+               case '\n':
+                       memcpy(bptr, HKEY("\\n"));
+                       bptr += 2;
+                       Target->BufUsed += 2;                           
+                       break;
+               case '\r':
+                       memcpy(bptr, HKEY("\\r"));
+                       bptr += 2;
+                       Target->BufUsed += 2;
+                       break;
+               case '"':
                        *bptr = '\\';
                        bptr ++;
                        *bptr = '"';
                        bptr ++;
                        Target->BufUsed += 2;
-               } else if (*aptr == '\\') {
+                       break;
+               case '\\':
+                       if ((*(aptr + 1) == 'u') &&
+                           isxdigit(*(aptr + 2)) &&
+                           isxdigit(*(aptr + 3)) &&
+                           isxdigit(*(aptr + 4)) &&
+                           isxdigit(*(aptr + 5)))
+                       { /* oh, a unicode escaper. let it pass through. */
+                               memcpy(bptr, aptr, 6);
+                               aptr += 5;
+                               bptr +=6;
+                               Target->BufUsed += 6;
+                       }
+                       else 
+                       {
+                               *bptr = '\\';
+                               bptr ++;
+                               *bptr = '\\';
+                               bptr ++;
+                               Target->BufUsed += 2;
+                       }
+                       break;
+               case '\b':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'b';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               case '\f':
                        *bptr = '\\';
                        bptr ++;
+                       *bptr = 'f';
+                       bptr ++;
                        Target->BufUsed += 2;
-               }
-               else{
-                       *bptr = *aptr;
-                       bptr++;
-                       Target->BufUsed ++;
+                       break;
+               case '\t':
+                       *bptr = '\\';
+                       bptr ++;
+                       *bptr = 't';
+                       bptr ++;
+                       Target->BufUsed += 2;
+                       break;
+               default:
+                       IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(aptr, eiptr);
+                       while (IsUtf8Sequence > 0){
+                               *bptr = *aptr;
+                               Target->BufUsed ++;
+                               if (--IsUtf8Sequence)
+                                       aptr++;
+                               bptr++;
+                       }
                }
                aptr ++;
        }
@@ -2248,12 +2300,6 @@ long StrHtmlEcmaEscAppend(StrBuf *Target, const StrBuf *Source, const char *Plai
                        bptr ++;
                        Target->BufUsed ++;
                        break;
-               case  32:
-//) && (nbsp == 1)) {
-                       memcpy(bptr, HKEY("&nbsp;"));
-                       bptr += 6;
-                       Target->BufUsed += 6;
-                       break;
                case '\n':
                        switch (nolinebreaks) {
                        case 1:
@@ -2333,6 +2379,13 @@ long StrHtmlEcmaEscAppend(StrBuf *Target, const StrBuf *Source, const char *Plai
                        bptr ++;
                        Target->BufUsed += 2;
                        break;
+               case  32:
+                       if (nbsp == 1) {
+                               memcpy(bptr, HKEY("&nbsp;"));
+                               bptr += 6;
+                               Target->BufUsed += 6;
+                               break;
+                       }
                default:
                        IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(aptr, eiptr);
                        while (IsUtf8Sequence > 0){
@@ -3259,14 +3312,16 @@ void StrBuf_RFC822_2_Utf8(StrBuf *Target,
  */
 static inline int Ctdl_GetUtf8SequenceLength(const char *CharS, const char *CharE)
 {
-       int n = 1;
-        char test = (1<<7);
+       int n = 0;
+        unsigned char test = (1<<7);
 
-       if ((*CharS & 0xC0) == 0) 
+       if ((*CharS & 0xC0) != 0xC0) 
                return 1;
 
-       while ((n < 8) && ((test & *CharS) != 0)) {
-               test = test << 1;
+       while ((n < 8) && 
+              ((test & ((unsigned char)*CharS)) != 0)) 
+       {
+               test = test >> 1;
                n ++;
        }
        if ((n > 6) || ((CharE - CharS) < n))
@@ -3283,7 +3338,7 @@ static inline int Ctdl_GetUtf8SequenceLength(const char *CharS, const char *Char
 static inline int Ctdl_IsUtf8SequenceStart(const char Char)
 {
 /** 11??.???? indicates an UTF8 Sequence. */
-       return ((Char & 0xC0) != 0);
+       return ((Char & 0xC0) == 0xC0);
 }
 
 /**