* Fixed striplt() to only remove leading and trailing blanks, but no others. It...

[citadel.git] / libcitadel / lib / tools.c
diff --git a/libcitadel/lib/tools.c b/libcitadel/lib/tools.c

index 606449da1d6ae270244b6b005025b6498e4a5c21..14cf43ec969c8ec8aa16210cea2e0c5253150fae 100644 (file)
--- a/libcitadel/lib/tools.c
+++ b/libcitadel/lib/tools.c
@@ -73,8 +73,16 @@ const byte dtable[256] = {
         128, 128, 0
  };
  
-
-char *safestrncpy(char *dest, const char *src, size_t n)
+/*
+ * copy a string into a buffer of a known size. abort if we exceed the limits
+ *
+ * dest        the targetbuffer
+ * src the source string
+ * n   the size od dest
+ *
+ * returns the number of characters copied if dest is big enough, -n if not.
+ */
+int safestrncpy(char *dest, const char *src, size_t n)
  {
         int i = 0;
  
@@ -85,11 +93,11 @@ char *safestrncpy(char *dest, const char *src, size_t n)
  
         do {
                 dest[i] = src[i];
-               if (dest[i] == 0) return(dest);
+               if (dest[i] == 0) return i;
                 ++i;
         } while (i<n);
         dest[n - 1] = 0;
-       return dest;
+       return -i;
  }
  
  
@@ -438,7 +446,7 @@ char *rfc2047encode(char *line, long length)
                 return strdup(line);
         }
  
-       result = (char*) malloc(strlen(UTF8_HEADER) + 4 + length * 2);
+       result = (char*) malloc(sizeof(UTF8_HEADER) + 4 + length * 2);
         strncpy (result, UTF8_HEADER, strlen (UTF8_HEADER));
         CtdlEncodeBase64(result + strlen(UTF8_HEADER), line, length, 0);
         end = strlen (result);
@@ -452,26 +460,35 @@ char *rfc2047encode(char *line, long length)
  /*
   * Strip leading and trailing spaces from a string
   */
-void striplt(char *buf)
-{
-       size_t len;
-       int a;
+size_t striplt(char *buf) {
+       char *first_nonspace = NULL;
+       char *last_nonspace = NULL;
+       char *ptr;
+       size_t new_len = 0;
  
-       if (buf==NULL) return;
-       if (IsEmptyStr(buf)) return;
-       len = strlen(buf);
-        while ((!IsEmptyStr(buf)) && (isspace(buf[len - 1])))
-                buf[--len] = 0;
-       if (IsEmptyStr(buf)) return;
-       a = 0;
-        while ((!IsEmptyStr(buf)) && (isspace(buf[a])))
-               a++;
-       if (a > 0)
-                memmove(buf, &buf[a], len - a + 1);
-}
+       if (!buf) {
+               return 0;
+       }
  
+       for (ptr=buf; *ptr!=0; ++ptr) {
+               if (!isspace(*ptr)) {
+                       if (!first_nonspace) {
+                               first_nonspace = ptr;
+                       }
+                       last_nonspace = ptr;
+               }
+       }
  
+       if ((!first_nonspace) || (!last_nonspace)) {
+               buf[0] = 0;
+               return 0;
+       }
  
+       new_len = last_nonspace - first_nonspace + 1;
+       memcpy(buf, first_nonspace, new_len);
+       buf[new_len] = 0;
+       return new_len;
+}
  
  
  /**
@@ -547,7 +564,7 @@ void fmt_date(char *buf, size_t n, time_t thetime, int seconds) {
   * Determine whether the specified message number is contained within the
   * specified sequence set.
   */
-int is_msg_in_sequence_set(char *mset, long msgnum) {
+int is_msg_in_sequence_set(const char *mset, long msgnum) {
         int num_sets;
         int s;
         char setstr[128], lostr[128], histr[128];
@@ -640,7 +657,7 @@ char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen)
   * Strip a boundarized substring out of a string (for example, remove
   * parentheses and anything inside them).
   */
-void stripout(char *str, char leftboundary, char rightboundary) {
+int stripout(char *str, char leftboundary, char rightboundary) {
         int a;
          int lb = (-1);
          int rb = (-1);
@@ -652,12 +669,14 @@ void stripout(char *str, char leftboundary, char rightboundary) {
  
          if ( (lb > 0) && (rb > lb) ) {
                  strcpy(&str[lb - 1], &str[rb + 1]);
+               return 1;
          }
  
          else if ( (lb == 0) && (rb > lb) ) {
                  strcpy(str, &str[rb + 1]);
+               return 1;
          }
-
+       return 0;
  }
  
  
@@ -757,17 +776,15 @@ void generate_uuid(char *buf) {
   * The code is roughly based on the strstr() replacement from 'tin' written
   * by Urs Jannsen.
   */
-char *bmstrcasestr(char *text, char *pattern) {
+inline char *_bmstrcasestr_len(char *text, size_t textlen, char *pattern, size_t patlen) {
  
         register unsigned char *p, *t;
         register int i, j, *delta;
         register size_t p1;
         int deltaspace[256];
-       size_t textlen;
-       size_t patlen;
  
-       textlen = strlen (text);
-       patlen = strlen (pattern);
+       if (!text) return(NULL);
+       if (!pattern) return(NULL);
  
         /* algorithm fails if pattern is empty */
         if ((p1 = patlen) == 0)
@@ -810,7 +827,29 @@ char *bmstrcasestr(char *text, char *pattern) {
         return (NULL);
  }
  
+/*
+ * bmstrcasestr() -- case-insensitive substring search
+ *
+ * This uses the Boyer-Moore search algorithm and is therefore quite fast.
+ * The code is roughly based on the strstr() replacement from 'tin' written
+ * by Urs Jannsen.
+ */
+char *bmstrcasestr(char *text, char *pattern) {
+       size_t textlen;
+       size_t patlen;
+
+       if (!text) return(NULL);
+       if (!pattern) return(NULL);
+
+       textlen = strlen (text);
+       patlen = strlen (pattern);
  
+       return _bmstrcasestr_len(text, textlen, pattern, patlen);
+}
+
+char *bmstrcasestr_len(char *text, size_t textlen, char *pattern, size_t patlen) {
+       return _bmstrcasestr_len(text, textlen, pattern, patlen);
+}
  
  /*
   * Local replacement for controversial C library function that generates
@@ -820,7 +859,7 @@ void CtdlMakeTempFileName(char *name, int len) {
         int i = 0;
  
         while (i++, i < 100) {
-               snprintf(name, len, "/tmp/ctdl.%4lx.%04x",
+               snprintf(name, len, "/tmp/ctdl.%04lx.%04x",
                         (long)getpid(),
                         rand()
                 );
@@ -836,7 +875,7 @@ void CtdlMakeTempFileName(char *name, int len) {
   * Determine whether the specified message number is contained within the specified set.
   * Returns nonzero if the specified message number is in the specified message set string.
   */
-int is_msg_in_mset(char *mset, long msgnum) {
+int is_msg_in_mset(const char *mset, long msgnum) {
         int num_sets;
         int s;
         char setstr[SIZ], lostr[SIZ], histr[SIZ];       /* was 1024 */
@@ -870,10 +909,8 @@ int is_msg_in_mset(char *mset, long msgnum) {
  
  
  /*
- * \brief searches for a  paternn within asearch string
- * \param search the string to search 
- * \param patn the pattern to find in string
- * \returns position in string
+ * searches for a pattern within a search string
+ * returns position in string
   */
  int pattern2(char *search, char *patn)
  {
@@ -889,10 +926,10 @@ int pattern2(char *search, char *patn)
  }
  
  
-/**
- * \brief Strip leading and trailing spaces from a string; with premeasured and adjusted length.
- * \param buf the string to modify
- * \param len length of the string. 
+/*
+ * Strip leading and trailing spaces from a string; with premeasured and adjusted length.
+ * buf - the string to modify
+ * len - length of the string. 
   */
  void stripltlen(char *buf, int *len)
  {
@@ -910,3 +947,114 @@ void stripltlen(char *buf, int *len)
         }
  }
  
+/**
+ * \brief detect whether this char starts an utf-8 encoded char
+ * \param Char character to inspect
+ * \returns yes or no
+ */
+inline int Ctdl_IsUtf8SequenceStart(char Char)
+{
+/** 11??.???? indicates an UTF8 Sequence. */
+       return ((Char & 0xC0) != 0);
+}
+
+/**
+ * \brief evaluate the length of an utf8 special character sequence
+ * \param Char the character to examine
+ * \returns width of utf8 chars in bytes
+ */
+inline int Ctdl_GetUtf8SequenceLength(char Char)
+{
+       int n = 1;
+        char test = (1<<7);
+       
+       while ((n < 8) && ((test & Char) != 0)) {
+               test = test << 1;
+               n ++;
+       }
+       if (n > 6)
+               n = 1;
+       return n;
+}
+
+/**
+ * \brief measure the number of glyphs in an UTF8 string...
+ * \param str string to measure
+ * \returns the length of str
+ */
+int Ctdl_Utf8StrLen(char *str)
+{
+       int n = 0;
+       int m = 0;
+       char *aptr;
+
+       if (str == NULL)
+               return n;
+       aptr = str;
+       while (*aptr != '\0') {
+               if (Ctdl_IsUtf8SequenceStart(*aptr)){
+                       m = Ctdl_GetUtf8SequenceLength(*aptr);
+                       while ((m-- > 0) && (*aptr++ != '\0'))
+                               n ++;
+               }
+               else {
+                       n++;
+                       aptr++;
+               }
+                       
+       }
+       return n;
+}
+
+/**
+ * \brief cuts a string after maxlen glyphs
+ * \param str string to cut to maxlen glyphs
+ * \param maxlen how long may the string become?
+ * \returns pointer to maxlen or the end of the string
+ */
+char *Ctdl_Utf8StrCut(char *str, int maxlen)
+{
+       int n = 0, m = 0;
+       char *aptr;
+
+       if (str == NULL)
+               return NULL;
+       aptr = str;
+       while (*aptr != '\0') {
+               if (Ctdl_IsUtf8SequenceStart(*aptr)){
+                       m = Ctdl_GetUtf8SequenceLength(*aptr);
+                       while ((m-- > 0) && (*aptr++ != '\0'))
+                               n ++;
+               }
+               else {
+                       n++;
+                       aptr++;
+               }
+               if (n > maxlen) {
+                       *aptr = '\0';
+                       return aptr;
+               }                       
+       }
+       return aptr;
+}
+
+
+/*
+ * Convert all whitespace characters in a supplied string to underscores
+ */
+void convert_spaces_to_underscores(char *str)
+{
+       int len;
+       int i;
+
+       if (!str) return;
+
+       len = strlen(str);
+       for (i=0; i<len; ++i) {
+               if (isspace(str[i])) {
+                       str[i] = '_';
+               }
+       }
+}
+
+