Began making changes to do better handling of character sets.

[citadel.git] / webcit / tools.c
diff --git a/webcit/tools.c b/webcit/tools.c

index 6a2bad457162b5458809fcf46b18896b8dc7d843..0a0718381d7ae3d388ea1c6fdb7ac45b8d2d0669 100644 (file)
--- a/webcit/tools.c
+++ b/webcit/tools.c
@@ -1,50 +1,33 @@
  /*
- * tools.c -- Miscellaneous routines 
+ * $Id$
+ */
+/**
+ * \defgroup MiscRout Miscellaneous routines 
+ * \ingroup tools
   */
  
-#include <ctype.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <limits.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <string.h>
-#include <pwd.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <pthread.h>
-#include <signal.h>
-#include <sys/time.h>
+/*@{*/
  #include "webcit.h"
  #include "webserver.h"
  
-typedef unsigned char byte;
-
-#define FALSE 0
-#define TRUE 1
  
-char *ascmonths[] = {
-       "Jan", "Feb", "Mar", "Apr", "May", "Jun",
-       "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
-};
+typedef unsigned char byte; /**< byte data type */
  
-char *ascdays[] = {
-       "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
-};
+#define FALSE 0 /**< no. */
+#define TRUE 1  /**< yes. */
  
-static byte dtable[256];       /* base64 encode / decode table */
+static byte dtable[256];       /**< base64 encode / decode table */
  
+/**
+ * \brief sanitize strncopy.
+ * \param dest destination string
+ * \param src source string
+ * \param n length of source to copy 
+ * \return result string
+ */
  char *safestrncpy(char *dest, const char *src, size_t n)
  {
         if (dest == NULL || src == NULL) {
-               lprintf(1, "safestrncpy: NULL argument\n");
                 abort();
         }
         strncpy(dest, src, n);
@@ -54,12 +37,15 @@ char *safestrncpy(char *dest, const char *src, size_t n)
  
  
  
-/*
- * num_tokens()  -  discover number of parameters/tokens in a string
+/**
+ * \brief discover number of parameters/tokens in a string
+ * \param source string to inspect
+ * \param tok seperation token
+ * \return number of tokenized parts found
   */
  int num_tokens(char *source, char tok)
  {
-       int a;
+       int a = 0;
         int count = 1;
  
         if (source == NULL)
@@ -71,37 +57,51 @@ int num_tokens(char *source, char tok)
         return (count);
  }
  
-/*
- * extract_token()  -  a smarter string tokenizer
+/**
+ * brief a string tokenizer
+ * \param dest destination string 
+ * \param source the string to grab tokens from
+ * \param parmnum the n'th token to grab
+ * \param separator the tokenizer string
+ * \param maxlen the length of dest
   */
-void extract_token(char *dest, char *source, int parmnum, char separator)
+void extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen)
  {
-       int i;
-       int len;
-       int curr_parm;
-
-       strcpy(dest, "");
-       len = 0;
-       curr_parm = 0;
-
-       if (strlen(source) == 0) {
-               return;
+       char *d;                /* dest */
+       const char *s;          /* source */
+       int count = 0;
+       int len = 0;
+
+       dest[0] = 0;
+
+       /* Locate desired parameter */
+       s = source;
+       while (count < parmnum) {
+               /* End of string, bail! */
+               if (!*s) {
+                       s = NULL;
+                       break;
+               }
+               if (*s == separator) {
+                       count++;
+               }
+               s++;
         }
+       if (!s) return;         /* Parameter not found */
  
-       for (i = 0; i < strlen(source); ++i) {
-               if (source[i] == separator) {
-                       ++curr_parm;
-               } else if (curr_parm == parmnum) {
-                       dest[len + 1] = 0;
-                       dest[len++] = source[i];
-               }
+       for (d = dest; *s && *s != separator && ++len<maxlen; s++, d++) {
+               *d = *s;
         }
+       *d = 0;
  }
  
  
  
-/*
- * remove_token()  -  a tokenizer that kills, maims, and destroys
+/**
+ * \brief a tokenizer that kills, maims, and destroys
+ * \param source the string to process
+ * \param parmnum which token to kill
+ * \param separator the tokenizer string
   */
  void remove_token(char *source, int parmnum, char separator)
  {
@@ -142,26 +142,32 @@ void remove_token(char *source, int parmnum, char separator)
  
  
  
-/*
- * extract_int()  -  extract an int parm w/o supplying a buffer
+/**
+ * \brief extract an int parm w/o supplying a buffer
+ * \param source the string to locate the int in
+ * \param parmnum the n'th token to grab the int from
+ * \return the integer
   */
-int extract_int(char *source, int parmnum)
+int extract_int(const char *source, int parmnum)
  {
-       char buf[SIZ];
-
-       extract_token(buf, source, parmnum, '|');
-       return (atoi(buf));
+       char buf[32];
+       
+       extract_token(buf, source, parmnum, '|', sizeof buf);
+       return(atoi(buf));
  }
  
-/*
- * extract_long()  -  extract an long parm w/o supplying a buffer
+/**
+ * \brief extract an long parm w/o supplying a buffer
+ * \param source string to examine
+ * \param parmnum n'th token to search long in
+ * \return the found long value
   */
-long extract_long(char *source, long int parmnum)
+long extract_long(const char *source, int parmnum)
  {
-       char buf[SIZ];
-
-       extract_token(buf, source, parmnum, '|');
-       return (atol(buf));
+       char buf[32];
+       
+       extract_token(buf, source, parmnum, '|', sizeof buf);
+       return(atol(buf));
  }
  
  
@@ -169,21 +175,13 @@ long extract_long(char *source, long int parmnum)
  
  
  
-
-
-
-
-
-
-
-
-
-/*
- * check for the presence of a character within a string (returns count)
+/**
+ * \brief check for the presence of a character within a string (returns count)
+ * \param st the string to examine
+ * \param ch the char to search
+ * \return the position inside of st
   */
-int haschar(st, ch)
-char st[];
-char ch;
+int haschar(char *st,char ch)
  {
         int a, b;
         b = 0;
@@ -194,86 +192,18 @@ char ch;
  }
  
  
-/*
- * Format a date/time stamp for output 
- */
-void fmt_date(char *buf, time_t thetime)
-{
-       struct tm *tm;
-       int hour;
-
-       strcpy(buf, "");
-       tm = localtime(&thetime);
-       hour = tm->tm_hour;
-       if (hour == 0)
-               hour = 12;
-       else if (hour > 12)
-               hour = hour - 12;
-
-       sprintf(buf, "%s %d %d %2d:%02d%s",
-               ascmonths[tm->tm_mon],
-               tm->tm_mday,
-               tm->tm_year + 1900,
-               hour, tm->tm_min, ((tm->tm_hour > 12) ? "pm" : "am")
-           );
-}
-
-
-
-/*
- * Format TIME ONLY for output 
- */
-void fmt_time(char *buf, time_t thetime)
-{
-       struct tm *tm;
-       int hour;
-
-       strcpy(buf, "");
-       tm = localtime(&thetime);
-       hour = tm->tm_hour;
-       if (hour == 0)
-               hour = 12;
-       else if (hour > 12)
-               hour = hour - 12;
-
-       sprintf(buf, "%d:%02d%s",
-               hour, tm->tm_min, ((tm->tm_hour > 12) ? "pm" : "am")
-           );
-}
-
-
-
-
-/*
- * Format a date/time stamp to the format used in HTTP headers
- */
-void httpdate(char *buf, time_t thetime)
-{
-       struct tm *tm;
-
-       strcpy(buf, "");
-       tm = localtime(&thetime);
-
-       sprintf(buf, "%s, %02d %s %4d %02d:%02d:%02d",
-               ascdays[tm->tm_wday],
-               tm->tm_mday,
-               ascmonths[tm->tm_mon],
-               tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec);
-}
-
-
-
-
-
-/*
- * Utility function to "readline" from memory
- * (returns new pointer)
+/** 
+ * \brief Utility function to "readline" from memory
+ * \param start Location in memory from which we are reading.
+ * \param buf the buffer to place the string in.
+ * \param maxlen Size of string buffer
+ * \return Pointer to the source memory right after we stopped reading.
   */
  char *memreadline(char *start, char *buf, int maxlen)
  {
         char ch;
         char *ptr;
-       int len = 0;            /* tally our own length to avoid strlen() delays */
+       int len = 0;            /**< tally our own length to avoid strlen() delays */
  
         ptr = start;
         memset(buf, 0, maxlen);
@@ -293,8 +223,11 @@ char *memreadline(char *start, char *buf, int maxlen)
  
  
  
-/*
- * pattern2()  -  searches for patn within search string, returns pos
+/**
+ * \brief searches for a  paternn within asearch string
+ * \param search the string to search 
+ * \param patn the pattern to find in string
+ * \returns position in string
   */
  int pattern2(char *search, char *patn)
  {
@@ -307,24 +240,31 @@ int pattern2(char *search, char *patn)
  }
  
  
-/*
- * Strip leading and trailing spaces from a string
+/**
+ * \brief Strip leading and trailing spaces from a string
+ * \param buf the string to modify
   */
  void striplt(char *buf)
  {
+       if (strlen(buf) == 0) return;
         while ((strlen(buf) > 0) && (isspace(buf[0])))
                 strcpy(buf, &buf[1]);
+       if (strlen(buf) == 0) return;
         while (isspace(buf[strlen(buf) - 1]))
                 buf[strlen(buf) - 1] = 0;
  }
  
  
-/*
- * Determine whether the specified message number is contained within the
+/**
+ * \brief Determine whether the specified message number is contained within the
   * specified set.
+ *
+ * \param mset Message set string
+ * \param msgnum Message number we are looking for
+ *
+ * \return Nonzero if the specified message number is in the specified message set string.
   */
-int is_msg_in_mset(char *mset, long msgnum)
-{
+int is_msg_in_mset(char *mset, long msgnum) {
         int num_sets;
         int s;
         char setstr[SIZ], lostr[SIZ], histr[SIZ];       /* was 1024 */
@@ -334,35 +274,39 @@ int is_msg_in_mset(char *mset, long msgnum)
          * Now set it for all specified messages.
          */
         num_sets = num_tokens(mset, ',');
-       for (s = 0; s < num_sets; ++s) {
-               extract_token(setstr, mset, s, ',');
+       for (s=0; s<num_sets; ++s) {
+               extract_token(setstr, mset, s, ',', sizeof setstr);
  
-               extract_token(lostr, setstr, 0, ':');
+               extract_token(lostr, setstr, 0, ':', sizeof lostr);
                 if (num_tokens(setstr, ':') >= 2) {
-                       extract_token(histr, setstr, 1, ':');
+                       extract_token(histr, setstr, 1, ':', sizeof histr);
                         if (!strcmp(histr, "*")) {
-                               snprintf(histr, sizeof histr, "%ld",
-                                        LONG_MAX);
+                               snprintf(histr, sizeof histr, "%ld", LONG_MAX);
                         }
-               } else {
+               } 
+               else {
                         strcpy(histr, lostr);
                 }
                 lo = atol(lostr);
                 hi = atol(histr);
  
-               if ((msgnum >= lo) && (msgnum <= hi))
-                       return (1);
+               if ((msgnum >= lo) && (msgnum <= hi)) return(1);
         }
  
-       return (0);
+       return(0);
  }
  
  
-/*
- * Strip a boundarized substring out of a string (for example, remove
+
+/**
+ * \brief Strip a boundarized substring out of a string
+ * (for example, remove
   * parentheses and anything inside them).
   *
   * This improved version can strip out *multiple* boundarized substrings.
+ * \param str the string to process
+ * \param leftboundary the boundary character on the left side of the target string 
+ * \param rightboundary the boundary character on the right side of the target string
   */
  void stripout(char *str, char leftboundary, char rightboundary)
  {
@@ -391,8 +335,9 @@ void stripout(char *str, char leftboundary, char rightboundary)
  
  
  
-/*
- * Replacement for sleep() that uses select() in order to avoid SIGALRM
+/**
+ * \brief Replacement for sleep() that uses select() in order to avoid SIGALRM
+ * \param seconds how many seconds should we sleep?
   */
  void sleeeeeeeeeep(int seconds)
  {
@@ -405,9 +350,13 @@ void sleeeeeeeeeep(int seconds)
  
  
  
-/*
+/**
+ * \brief encode a string into base64 to for example tunnel it through mail transport
   * CtdlDecodeBase64() and CtdlEncodeBase64() are adaptations of code by
   * John Walker, copied over from the Citadel server.
+ * \param dest encrypted string
+ * \param source the string to encrypt
+ * \param sourcelen the length of the source data (may contain string terminators)
   */
  
  void CtdlEncodeBase64(char *dest, const char *source, size_t sourcelen)
@@ -417,7 +366,7 @@ void CtdlEncodeBase64(char *dest, const char *source, size_t sourcelen)
         int dpos = 0;
         int thisline = 0;
  
-       /*  Fill dtable with character encodings.  */
+       /**  Fill dtable with character encodings.  */
  
         for (i = 0; i < 26; i++) {
                 dtable[i] = 'A' + i;
@@ -452,9 +401,11 @@ void CtdlEncodeBase64(char *dest, const char *source, size_t sourcelen)
                                    (igroup[2] >> 6)];
                         ogroup[3] = dtable[igroup[2] & 0x3F];
  
-                       /* Replace characters in output stream with "=" pad
-                          characters if fewer than three characters were
-                          read from the end of the input stream. */
+                       /**
+                        * Replace characters in output stream with "=" pad
+                        * characters if fewer than three characters were
+                        * read from the end of the input stream. 
+                        */
  
                         if (n < 3) {
                                 ogroup[3] = '=';
@@ -484,9 +435,14 @@ void CtdlEncodeBase64(char *dest, const char *source, size_t sourcelen)
  }
  
  
-/* 
- * Convert base64-encoded to binary.  Returns the length of the decoded data.
+/**
+ * \brief Convert base64-encoded to binary.  
   * It will stop after reading 'length' bytes.
+ *
+ * \param dest The destination buffer 
+ * \param source The base64 data to be decoded.
+ * \param length The number of bytes to decode.
+ * \return The actual length of the decoded data.
   */
  int CtdlDecodeBase64(char *dest, const char *source, size_t length)
  {
@@ -510,7 +466,7 @@ int CtdlDecodeBase64(char *dest, const char *source, size_t length)
         dtable['/'] = 63;
         dtable['='] = 0;
  
-        /*CONSTANTCONDITION*/ while (TRUE) {
+       /**CONSTANTCONDITION*/ while (TRUE) {
                 byte a[4], b[4], o[3];
  
                 for (i = 0; i < 4; i++) {
@@ -526,7 +482,7 @@ int CtdlDecodeBase64(char *dest, const char *source, size_t length)
                                 return (dpos);
                         }
                         if (dtable[c] & 0x80) {
-                               /* Ignoring errors: discard invalid character. */
+                               /** Ignoring errors: discard invalid character */
                                 i--;
                                 continue;
                         }
@@ -549,3 +505,114 @@ int CtdlDecodeBase64(char *dest, const char *source, size_t length)
                 }
         }
  }
+
+
+
+/**
+ * \brief Generate a new, globally unique UID parameter for a calendar etc. object
+ *
+ * \param buf String buffer into which our newly created UUID should be placed
+ */
+void generate_uuid(char *buf) {
+       static int seq = 0;
+
+       sprintf(buf, "%s-%lx-%x-%x",
+               serv_info.serv_nodename,
+               (long)time(NULL),
+               getpid(),
+               (seq++)
+       );
+}
+
+
+/**
+ * \brief Local replacement for controversial C library function that generates
+ * names for temporary files.  Included to shut up compiler warnings.
+ * \todo return a fd to the file instead of the name for security reasons
+ * \param name the created filename
+ * \param len the length of the filename
+ */
+void CtdlMakeTempFileName(char *name, int len) {
+       int i = 0;
+
+       while (i++, i < 100) {
+               snprintf(name, len, "/tmp/ctdl.%04x.%04x",
+                       getpid(),
+                       rand()
+               );
+               if (!access(name, F_OK)) {
+                       return;
+               }
+       }
+}
+
+
+
+/*
+ * \brief      case-insensitive substring search
+ *
+ *             This uses the Boyer-Moore search algorithm and is therefore quite fast.
+ *             The code is roughly based on the strstr() replacement from 'tin' written
+ *             by Urs Jannsen.
+ *
+ * \param      text    String to be searched
+ * \param      pattern String to search for
+ */
+char *bmstrcasestr(char *text, char *pattern) {
+
+       register unsigned char *p, *t;
+       register int i, j, *delta;
+       register size_t p1;
+       int deltaspace[256];
+       size_t textlen;
+       size_t patlen;
+
+       textlen = strlen (text);
+       patlen = strlen (pattern);
+
+       /* algorithm fails if pattern is empty */
+       if ((p1 = patlen) == 0)
+               return (text);
+
+       /* code below fails (whenever i is unsigned) if pattern too long */
+       if (p1 > textlen)
+               return (NULL);
+
+       /* set up deltas */
+       delta = deltaspace;
+       for (i = 0; i <= 255; i++)
+               delta[i] = p1;
+       for (p = (unsigned char *) pattern, i = p1; --i > 0;)
+               delta[tolower(*p++)] = i;
+
+       /*
+        * From now on, we want patlen - 1.
+        * In the loop below, p points to the end of the pattern,
+        * t points to the end of the text to be tested against the
+        * pattern, and i counts the amount of text remaining, not
+        * including the part to be tested.
+        */
+       p1--;
+       p = (unsigned char *) pattern + p1;
+       t = (unsigned char *) text + p1;
+       i = textlen - patlen;
+       while(1) {
+               if (tolower(p[0]) == tolower(t[0])) {
+                       if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
+                               return ((char *)t - p1);
+                       }
+               }
+               j = delta[tolower(t[0])];
+               if (i < j)
+                       break;
+               i -= j;
+               t += j;
+       }
+       return (NULL);
+}
+
+
+
+
+
+/*@}*/