From 30ad8940ba800c133c0b1cb66973aaacbf68fe5d Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Mon, 16 Feb 2015 20:52:59 +0100 Subject: [PATCH] Fix utf8-handling. --- citadel/modules/xmpp/serv_xmpp.c | 49 +++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/citadel/modules/xmpp/serv_xmpp.c b/citadel/modules/xmpp/serv_xmpp.c index e23ede887..64dc8b7aa 100644 --- a/citadel/modules/xmpp/serv_xmpp.c +++ b/citadel/modules/xmpp/serv_xmpp.c @@ -87,13 +87,32 @@ static void xmpp_entity_declaration(void *userData, const XML_Char *entityName, * Given a source string and a target buffer, returns the string * properly escaped for insertion into an XML stream. Returns a * pointer to the target buffer for convenience. - * - * BUG: this does not properly handle UTF-8 */ +static inline int Ctdl_GetUtf8SequenceLength(const char *CharS, const char *CharE) +{ + int n = 0; + unsigned char test = (1<<7); + + if ((*CharS & 0xC0) != 0xC0) + return 1; + + while ((n < 8) && + ((test & ((unsigned char)*CharS)) != 0)) + { + test = test >> 1; + n ++; + } + if ((n > 6) || ((CharE - CharS) < n)) + n = 0; + return n; +} + char *xmlesc(char *buf, char *str, int bufsiz) { - char *ptr; + int IsUtf8Sequence; + char *ptr, *pche; unsigned char ch; + int inlen; int len = 0; if (!buf) return(NULL); @@ -102,6 +121,8 @@ char *xmlesc(char *buf, char *str, int bufsiz) if (!str) { return(buf); } + inlen = strlen(str); + pche = str + inlen; for (ptr=str; *ptr; ptr++) { ch = *ptr; @@ -127,10 +148,24 @@ char *xmlesc(char *buf, char *str, int bufsiz) buf[len] = 0; } else { - char oct[10]; - sprintf(oct, "&#%o;", ch); - strcpy(&buf[len], oct); - len += strlen(oct); + IsUtf8Sequence = Ctdl_GetUtf8SequenceLength(ptr, pche); + if (IsUtf8Sequence) + { + while ((IsUtf8Sequence > 0) && + (ptr < pche)) + { + buf[len] = *ptr; + ptr ++; + --IsUtf8Sequence; + } + } + else + { + char oct[10]; + sprintf(oct, "&#%o;", ch); + strcpy(&buf[len], oct); + len += strlen(oct); + } } if ((len + 6) > bufsiz) { return(buf); -- 2.30.2