fix utf8 handling for xmpp - don't escape valid utf8 characters.
authorWilfried Goesgens <dothebart@citadel.org>
Sun, 20 Oct 2013 21:29:37 +0000 (23:29 +0200)
committerWilfried Goesgens <dothebart@citadel.org>
Sun, 20 Oct 2013 21:29:37 +0000 (23:29 +0200)
citadel/modules/xmpp/serv_xmpp.c

index 7d8fbc67d02c3349ec864d3d56e916563962fc50..457b2bd459dcb66092426561140a725610a640ea 100644 (file)
@@ -81,6 +81,25 @@ static void xmpp_entity_declaration(void *userData, const XML_Char *entityName,
 }
 #endif
 
+static inline int XMPP_GetUtf8SequenceLength(const char *CharS, const char *CharE)
+{
+       /* if this is is migrated to strbuf, remove this copy. */
+       int n = 0;
+        unsigned char test = (1<<7);
+
+       if ((*CharS & 0xC0) != 0xC0) 
+               return 1;
+
+       while ((n < 8) && 
+              ((test & ((unsigned char)*CharS)) != 0)) 
+       {
+               test = test >> 1;
+               n ++;
+       }
+       if ((n > 6) || ((CharE - CharS) < n))
+               n = 0;
+       return n;
+}
 
 
 /*
@@ -93,8 +112,11 @@ static void xmpp_entity_declaration(void *userData, const XML_Char *entityName,
 char *xmlesc(char *buf, char *str, int bufsiz)
 {
        char *ptr;
+       char *eiptr;
        unsigned char ch;
+       int inlen;
        int len = 0;
+       int IsUtf8Sequence;
 
        if (!buf) return(NULL);
        buf[0] = 0;
@@ -103,6 +125,9 @@ char *xmlesc(char *buf, char *str, int bufsiz)
                return(buf);
        }
 
+       inlen = strlen(str);
+       eiptr = str + inlen;
+
        for (ptr=str; *ptr; ptr++) {
                ch = *ptr;
                if (ch == '<') {
@@ -127,10 +152,25 @@ char *xmlesc(char *buf, char *str, int bufsiz)
                        buf[len] = 0;
                }
                else {
-                       char oct[10];
-                       sprintf(oct, "&#%o;", ch);
-                       strcpy(&buf[len], oct);
-                       len += strlen(oct);
+                       char oct[32];
+
+                       IsUtf8Sequence =  XMPP_GetUtf8SequenceLength(&buf[len], eiptr);
+                       if (IsUtf8Sequence)
+                       {
+                               while (IsUtf8Sequence > 0){
+                                       buf[len] = *ptr;
+                                       len ++;
+                                       if (--IsUtf8Sequence)
+                                               ptr++;
+                               }
+                               buf[len] = '\0';
+                       }
+                       else
+                       {
+                               sprintf(oct, "&#%o;", ch);
+                               strcpy(&buf[len], oct);
+                               len += strlen(oct);
+                       }
                }
                if ((len + 6) > bufsiz) {
                        return(buf);