From 476c203352976c03bf273053782a92d3d7e90be9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Wilfried=20G=C3=B6esgens?= Date: Wed, 26 Mar 2008 00:03:12 +0000 Subject: [PATCH] * add functions to evaluate utf8 strings as described at the instructables --- libcitadel/lib/libcitadel.h | 4 ++ libcitadel/lib/tools.c | 92 +++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/libcitadel/lib/libcitadel.h b/libcitadel/lib/libcitadel.h index 90bbb3530..6eab11ace 100644 --- a/libcitadel/lib/libcitadel.h +++ b/libcitadel/lib/libcitadel.h @@ -186,6 +186,10 @@ void ShutDownLibCitadel(void); /* tools */ +int Ctdl_IsUtf8SequenceStart(char Char); +int Ctdl_GetUtf8SequenceLength(char Char); +int Ctdl_Utf8StrLen(char *str); +char *Ctdl_Utf8StrCut(char *str, int maxlen); int safestrncpy(char *dest, const char *src, size_t n); diff --git a/libcitadel/lib/tools.c b/libcitadel/lib/tools.c index dd630420f..15e346778 100644 --- a/libcitadel/lib/tools.c +++ b/libcitadel/lib/tools.c @@ -919,6 +919,97 @@ void stripltlen(char *buf, int *len) } } +/** + * \brief detect whether this char starts an utf-8 encoded char + * \param Char character to inspect + * \returns yes or no + */ +inline int Ctdl_IsUtf8SequenceStart(char Char) +{ +/** 11??.???? indicates an UTF8 Sequence. */ + return ((Char & 0xC0) != 0); +} + +/** + * \brief evaluate the length of an utf8 special character sequence + * \param Char the character to examine + * \returns width of utf8 chars in bytes + */ +inline int Ctdl_GetUtf8SequenceLength(char Char) +{ + int n = 1; + char test = (1<<7); + + while ((n < 8) && ((test & Char) != 0)) { + test = test << 1; + n ++; + } + if (n > 6) + n = 1; + return n; +} + +/** + * \brief measure the number of glyphs in an UTF8 string... + * \param str string to measure + * \returns the length of str + */ +int Ctdl_Utf8StrLen(char *str) +{ + int n = 0; + int m = 0; + char *aptr; + + if (str == NULL) + return n; + aptr = str; + while (*aptr != '\0') { + if (Ctdl_IsUtf8SequenceStart(*aptr)){ + m = Ctdl_GetUtf8SequenceLength(*aptr); + while ((m-- > 0) && (*aptr++ != '\0')) + n ++; + } + else { + n++; + aptr++; + } + + } + return n; +} + +/** + * \brief cuts a string after maxlen glyphs + * \param str string to cut to maxlen glyphs + * \param maxlen how long may the string become? + * \returns pointer to maxlen or the end of the string + */ +char *Ctdl_Utf8StrCut(char *str, int maxlen) +{ + int n, m = 0; + char *aptr; + + if (str == NULL) + return NULL; + aptr = str; + while (*aptr != '\0') { + if (Ctdl_IsUtf8SequenceStart(*aptr)){ + m = Ctdl_GetUtf8SequenceLength(*aptr); + while ((m-- > 0) && (*aptr++ != '\0')) + n ++; + } + else { + n++; + aptr++; + } + if (n > maxlen) { + *aptr = '\0'; + return aptr; + } + } + return aptr; +} + /* * Convert all whitespace characters in a supplied string to underscores @@ -937,3 +1028,4 @@ void convert_spaces_to_underscores(char *str) } } } + -- 2.30.2