From: Wilfried Göesgens Date: Sun, 7 Jun 2009 14:06:50 +0000 (+0000) Subject: * move utf8 handling stuff into strbuf, so we can be more exact about our buffer... X-Git-Tag: v7.86~1098 X-Git-Url: https://code.citadel.org/?p=citadel.git;a=commitdiff_plain;h=33ac659dedc56d89451f53e84fd90927fd88bbf7 * move utf8 handling stuff into strbuf, so we can be more exact about our buffer lengths... * some more shuffling --- diff --git a/libcitadel/lib/libcitadel.h b/libcitadel/lib/libcitadel.h index 132c49f09..d9e3ffaf6 100644 --- a/libcitadel/lib/libcitadel.h +++ b/libcitadel/lib/libcitadel.h @@ -334,11 +334,6 @@ const char *GetIconFilename(char *MimeType, size_t len); /* tools */ -int Ctdl_IsUtf8SequenceStart(char Char); -int Ctdl_GetUtf8SequenceLength(char Char); -int Ctdl_Utf8StrLen(char *str); -char *Ctdl_Utf8StrCut(char *str, int maxlen); - int safestrncpy(char *dest, const char *src, size_t n); int num_tokens (const char *source, char tok); diff --git a/libcitadel/lib/stringbuf.c b/libcitadel/lib/stringbuf.c index 4498823d8..4bec25f3d 100644 --- a/libcitadel/lib/stringbuf.c +++ b/libcitadel/lib/stringbuf.c @@ -843,12 +843,16 @@ long StrECMAEscAppend(StrBuf *Target, const StrBuf *Source, const char *PlainIn) bptr = Target->buf + Target->BufUsed; } else if (*aptr == '"') { - memcpy(bptr, "\\\"", 2); - bptr += 2; + *bptr = '\\'; + bptr ++; + *bptr = '"'; + bptr ++; Target->BufUsed += 2; } else if (*aptr == '\\') { - memcpy(bptr, "\\\\", 2); - bptr += 2; + *bptr = '\\'; + bptr ++; + *bptr = '\\'; + bptr ++; Target->BufUsed += 2; } else{ @@ -859,7 +863,7 @@ long StrECMAEscAppend(StrBuf *Target, const StrBuf *Source, const char *PlainIn) aptr ++; } *bptr = '\0'; - if ((bptr = eptr - 1 ) && !IsEmptyStr(aptr) ) + if ((bptr == eptr - 1 ) && !IsEmptyStr(aptr) ) return -1; return Target->BufUsed; } @@ -2637,23 +2641,97 @@ void StrBuf_RFC822_to_Utf8(StrBuf *Target, const StrBuf *DecodeMe, const StrBuf* FreeStrBuf(&ConvertBuf2); } +/** + * \brief evaluate the length of an utf8 special character sequence + * \param Char the character to examine + * \returns width of utf8 chars in bytes + */ +static inline int Ctdl_GetUtf8SequenceLength(char *CharS, char *CharE) +{ + int n = 1; + char test = (1<<7); + + while ((n < 8) && ((test & *CharS) != 0)) { + test = test << 1; + n ++; + } + if ((n > 6) || ((CharE - CharS) > n)) + n = 1; + return n; +} +/** + * \brief detect whether this char starts an utf-8 encoded char + * \param Char character to inspect + * \returns yes or no + */ +static inline int Ctdl_IsUtf8SequenceStart(char Char) +{ +/** 11??.???? indicates an UTF8 Sequence. */ + return ((Char & 0xC0) != 0); +} +/** + * \brief measure the number of glyphs in an UTF8 string... + * \param str string to measure + * \returns the length of str + */ long StrBuf_Utf8StrLen(StrBuf *Buf) { - return Ctdl_Utf8StrLen(Buf->buf); + int n = 0; + int m = 0; + char *aptr, *eptr; + + if ((Buf == NULL) || (Buf->BufUsed == 0)) + return 0; + aptr = Buf->buf; + eptr = Buf->buf + Buf->BufUsed; + while ((aptr < eptr) && (*aptr != '\0')) { + if (Ctdl_IsUtf8SequenceStart(*aptr)){ + m = Ctdl_GetUtf8SequenceLength(aptr, eptr); + while ((aptr < eptr) && (m-- > 0) && (*aptr++ != '\0')) + n ++; + } + else { + n++; + aptr++; + } + + } + return n; } +/** + * \brief cuts a string after maxlen glyphs + * \param str string to cut to maxlen glyphs + * \param maxlen how long may the string become? + * \returns pointer to maxlen or the end of the string + */ long StrBuf_Utf8StrCut(StrBuf *Buf, int maxlen) { - char *CutAt; + char *aptr, *eptr; + int n = 0, m = 0; - CutAt = Ctdl_Utf8StrCut(Buf->buf, maxlen); - if (CutAt != NULL) { - Buf->BufUsed = CutAt - Buf->buf; - Buf->buf[Buf->BufUsed] = '\0'; + aptr = Buf->buf; + eptr = Buf->buf + Buf->BufUsed; + while ((aptr < eptr) && (*aptr != '\0')) { + if (Ctdl_IsUtf8SequenceStart(*aptr)){ + m = Ctdl_GetUtf8SequenceLength(aptr, eptr); + while ((m-- > 0) && (*aptr++ != '\0')) + n ++; + } + else { + n++; + aptr++; + } + if (n > maxlen) { + *aptr = '\0'; + Buf->BufUsed = aptr - Buf->buf; + return Buf->BufUsed; + } } - return Buf->BufUsed; + return Buf->BufUsed; + } diff --git a/libcitadel/lib/tools.c b/libcitadel/lib/tools.c index 5662133be..03d3213de 100644 --- a/libcitadel/lib/tools.c +++ b/libcitadel/lib/tools.c @@ -976,97 +976,6 @@ void stripltlen(char *buf, int *len) } } -/** - * \brief detect whether this char starts an utf-8 encoded char - * \param Char character to inspect - * \returns yes or no - */ -inline int Ctdl_IsUtf8SequenceStart(char Char) -{ -/** 11??.???? indicates an UTF8 Sequence. */ - return ((Char & 0xC0) != 0); -} - -/** - * \brief evaluate the length of an utf8 special character sequence - * \param Char the character to examine - * \returns width of utf8 chars in bytes - */ -inline int Ctdl_GetUtf8SequenceLength(char Char) -{ - int n = 1; - char test = (1<<7); - - while ((n < 8) && ((test & Char) != 0)) { - test = test << 1; - n ++; - } - if (n > 6) - n = 1; - return n; -} - -/** - * \brief measure the number of glyphs in an UTF8 string... - * \param str string to measure - * \returns the length of str - */ -int Ctdl_Utf8StrLen(char *str) -{ - int n = 0; - int m = 0; - char *aptr; - - if (str == NULL) - return n; - aptr = str; - while (*aptr != '\0') { - if (Ctdl_IsUtf8SequenceStart(*aptr)){ - m = Ctdl_GetUtf8SequenceLength(*aptr); - while ((m-- > 0) && (*aptr++ != '\0')) - n ++; - } - else { - n++; - aptr++; - } - - } - return n; -} - -/** - * \brief cuts a string after maxlen glyphs - * \param str string to cut to maxlen glyphs - * \param maxlen how long may the string become? - * \returns pointer to maxlen or the end of the string - */ -char *Ctdl_Utf8StrCut(char *str, int maxlen) -{ - int n = 0, m = 0; - char *aptr; - - if (str == NULL) - return NULL; - aptr = str; - while (*aptr != '\0') { - if (Ctdl_IsUtf8SequenceStart(*aptr)){ - m = Ctdl_GetUtf8SequenceLength(*aptr); - while ((m-- > 0) && (*aptr++ != '\0')) - n ++; - } - else { - n++; - aptr++; - } - if (n > maxlen) { - *aptr = '\0'; - return aptr; - } - } - return aptr; -} - /* * Convert all whitespace characters in a supplied string to underscores