* Given a source string and a target buffer, returns the string
* properly escaped for insertion into an XML stream. Returns a
* pointer to the target buffer for convenience.
- *
- * BUG: this does not properly handle UTF-8
*/
+static inline int Ctdl_GetUtf8SequenceLength(const char *CharS, const char *CharE)
+{
+ int n = 0;
+ unsigned char test = (1<<7);
+
+ if ((*CharS & 0xC0) != 0xC0)
+ return 1;
+
+ while ((n < 8) &&
+ ((test & ((unsigned char)*CharS)) != 0))
+ {
+ test = test >> 1;
+ n ++;
+ }
+ if ((n > 6) || ((CharE - CharS) < n))
+ n = 0;
+ return n;
+}
+
char *xmlesc(char *buf, char *str, int bufsiz)
{
- char *ptr;
+ int IsUtf8Sequence;
+ char *ptr, *pche;
unsigned char ch;
+ int inlen;
int len = 0;
if (!buf) return(NULL);
if (!str) {
return(buf);
}
+ inlen = strlen(str);
+ pche = str + inlen;
for (ptr=str; *ptr; ptr++) {
ch = *ptr;
buf[len] = 0;
}
else {
- char oct[10];
- sprintf(oct, "&#%o;", ch);
- strcpy(&buf[len], oct);
- len += strlen(oct);
+ IsUtf8Sequence = Ctdl_GetUtf8SequenceLength(ptr, pche);
+ if (IsUtf8Sequence)
+ {
+ while ((IsUtf8Sequence > 0) &&
+ (ptr < pche))
+ {
+ buf[len] = *ptr;
+ ptr ++;
+ --IsUtf8Sequence;
+ }
+ }
+ else
+ {
+ char oct[10];
+ sprintf(oct, "&#%o;", ch);
+ strcpy(&buf[len], oct);
+ len += strlen(oct);
+ }
}
if ((len + 6) > bufsiz) {
return(buf);