X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=libcitadel%2Flib%2Ftools.c;h=df2f1b42d1086e434a3c254ebca21fe217a3047a;hb=1aa2da0249792527f99172681aecc77e0ad086af;hp=60168fe91e0ca0cb3cd8c21cc38d775623b43515;hpb=0b4754a432708fec0328414fe4adff19c70c3855;p=citadel.git

diff --git a/libcitadel/lib/tools.c b/libcitadel/lib/tools.c
index 60168fe91..df2f1b42d 100644
--- a/libcitadel/lib/tools.c
+++ b/libcitadel/lib/tools.c
@@ -1,6 +1,22 @@
 /*
  * A basic toolset containing miscellaneous functions for string manipluation,
  * encoding/decoding, and a bunch of other stuff.
+ *
+ * Copyright (c) 1987-2011 by the citadel.org team
+ *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 
@@ -371,7 +387,6 @@ size_t CtdlEncodeBase64(char *dest, const char *source, size_t sourcelen, int li
 		dest[dpos++] = '\r';
 		dest[dpos++] = '\n';
 		dest[dpos] = 0;
-		thisline = 0;
 	}
 
 	return(dpos);
@@ -430,9 +445,9 @@ int CtdlDecodeBase64(char *dest, const char *source, size_t length)
 /*
  * if we send out non ascii subjects, we encode it this way.
  */
-char *rfc2047encode(char *line, long length)
+char *rfc2047encode(const char *line, long length)
 {
-	char *AlreadyEncoded;
+	const char *AlreadyEncoded;
 	char *result;
 	long end;
 #define UTF8_HEADER "=?UTF-8?B?"
@@ -456,39 +471,77 @@ char *rfc2047encode(char *line, long length)
 	return result;
 }
 
-
 /*
- * Strip leading and trailing spaces from a string
+ * removes double slashes from pathnames
+ * allows / disallows trailing slashes
  */
-void striplt(char *buf)
+void StripSlashes(char *Dir, int TrailingSlash)
 {
-	size_t len;
-	int a;
+	char *a, *b;
+
+	a = b = Dir;
+
+	while (!IsEmptyStr(a)) {
+		if (*a == '/') {
+			while (*a == '/')
+				a++;
+			*b = '/';
+			b++;
+		}
+		else {
+			*b = *a;
+			b++; a++;
+		}
+	}
+	if ((TrailingSlash) && (*(b - 1) != '/')){
+		*b = '/';
+		b++;
+	}
+	*b = '\0';
 
-	if (buf==NULL) return;
-	if (IsEmptyStr(buf)) return;
-	len = strlen(buf);
-        while ((!IsEmptyStr(buf)) && (isspace(buf[len - 1])))
-                buf[--len] = 0;
-	if (IsEmptyStr(buf)) return;
-	a = 0;
-        while ((!IsEmptyStr(buf)) && (isspace(buf[a])))
-		a++;
-	if (a > 0)
-                memmove(buf, &buf[a], len - a + 1);
 }
 
+/*
+ * Strip leading and trailing spaces from a string
+ */
+size_t striplt(char *buf) {
+	char *first_nonspace = NULL;
+	char *last_nonspace = NULL;
+	char *ptr;
+	size_t new_len = 0;
+
+	if ((buf == NULL) || (*buf == '\0')) {
+		return 0;
+	}
+
+	for (ptr=buf; *ptr!=0; ++ptr) {
+		if (!isspace(*ptr)) {
+			if (!first_nonspace) {
+				first_nonspace = ptr;
+			}
+			last_nonspace = ptr;
+		}
+	}
 
+	if ((!first_nonspace) || (!last_nonspace)) {
+		buf[0] = 0;
+		return 0;
+	}
 
+	new_len = last_nonspace - first_nonspace + 1;
+	memmove(buf, first_nonspace, new_len);
+	buf[new_len] = 0;
+	return new_len;
+}
 
 
 /**
  * \brief check for the presence of a character within a string (returns count)
  * \param st the string to examine
  * \param ch the char to search
- * \return the position inside of st
+ * \return the number of times ch appears in st
  */
-int haschar(const char *st,int ch)
+int haschar(const char *st, int ch)
 {
 	const char *ptr;
 	int b;
@@ -513,40 +566,19 @@ int haschar(const char *st,int ch)
  */
 void fmt_date(char *buf, size_t n, time_t thetime, int seconds) {
 	struct tm tm;
-	int hour;
+	char *teh_format = NULL;
 
-	/* Month strings for date conversions ... this needs to be localized eventually */
-	char *fmt_date_months[12] = {
-		"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
-	};
-
-	strcpy(buf, "");
+	*buf = '\0';
 	localtime_r(&thetime, &tm);
 
-	hour = tm.tm_hour;
-	if (hour == 0)	hour = 12;
-	else if (hour > 12) hour = hour - 12;
-
 	if (seconds) {
-		snprintf(buf, n, "%s %d %4d %d:%02d:%02d%s",
-			fmt_date_months[tm.tm_mon],
-			tm.tm_mday,
-			tm.tm_year + 1900,
-			hour,
-			tm.tm_min,
-			tm.tm_sec,
-			( (tm.tm_hour >= 12) ? "pm" : "am" )
-		);
-	} else {
-		snprintf(buf, n, "%s %d %4d %d:%02d%s",
-			fmt_date_months[tm.tm_mon],
-			tm.tm_mday,
-			tm.tm_year + 1900,
-			hour,
-			tm.tm_min,
-			( (tm.tm_hour >= 12) ? "pm" : "am" )
-		);
+		teh_format = "%F %R:%S";
 	}
+	else {
+		teh_format = "%F %R";
+	}
+
+	strftime(buf, n, teh_format, &tm);
 }
 
 
@@ -555,7 +587,7 @@ void fmt_date(char *buf, size_t n, time_t thetime, int seconds) {
  * Determine whether the specified message number is contained within the
  * specified sequence set.
  */
-int is_msg_in_sequence_set(char *mset, long msgnum) {
+int is_msg_in_sequence_set(const char *mset, long msgnum) {
 	int num_sets;
 	int s;
 	char setstr[128], lostr[128], histr[128];
@@ -642,13 +674,71 @@ char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen)
 }
 
 
+/** 
+ * \brief Utility function to "readline" from memory
+ * \param start Location in memory from which we are reading.
+ * \param buf the buffer to place the string in.
+ * \param maxlen Size of string buffer
+ * \return Pointer to the source memory right after we stopped reading.
+ */
+const char *cmemreadline(const char *start, char *buf, int maxlen)
+{
+	char ch;
+	const char *ptr;
+	int len = 0;		/**< tally our own length to avoid strlen() delays */
+
+	ptr = start;
+
+	while (1) {
+		ch = *ptr++;
+		if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
+			buf[len++] = ch;
+		}
+		if ((ch == 10) || (ch == 0)) {
+			buf[len] = 0;
+			return ptr;
+		}
+	}
+}
+
+
+/** 
+ * \brief Utility function to "readline" from memory
+ * \param start Location in memory from which we are reading.
+ * \param buf the buffer to place the string in.
+ * \param maxlen Size of string buffer
+ * \param retlen the length of the returned string
+ * \return Pointer to the source memory right after we stopped reading.
+ */
+const char *cmemreadlinelen(const char *start, char *buf, int maxlen, int *retlen)
+{
+	char ch;
+	const char *ptr;
+	int len = 0;		/**< tally our own length to avoid strlen() delays */
+
+	ptr = start;
+
+	while (1) {
+		ch = *ptr++;
+		if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
+			buf[len++] = ch;
+		}
+		if ((ch == 10) || (ch == 0)) {
+			buf[len] = 0;
+			*retlen = len;
+			return ptr;
+		}
+	}
+}
+
+
 
 
 /*
  * Strip a boundarized substring out of a string (for example, remove
  * parentheses and anything inside them).
  */
-void stripout(char *str, char leftboundary, char rightboundary) {
+int stripout(char *str, char leftboundary, char rightboundary) {
 	int a;
         int lb = (-1);
         int rb = (-1);
@@ -660,12 +750,14 @@ void stripout(char *str, char leftboundary, char rightboundary) {
 
         if ( (lb > 0) && (rb > lb) ) {
                 strcpy(&str[lb - 1], &str[rb + 1]);
+		return 1;
         }
 
         else if ( (lb == 0) && (rb > lb) ) {
                 strcpy(str, &str[rb + 1]);
+		return 1;
         }
-
+	return 0;
 }
 
 
@@ -673,19 +765,30 @@ void stripout(char *str, char leftboundary, char rightboundary) {
  * Reduce a string down to a boundarized substring (for example, remove
  * parentheses and anything outside them).
  */
-void stripallbut(char *str, char leftboundary, char rightboundary) {
-	int a;
-
-	for (a = 0; a < strlen(str); ++ a) {
-		if (str[a] == leftboundary) strcpy(str, &str[a+1]);
-	}
-
-	for (a = 0; a < strlen(str); ++ a) {
-		if (str[a] == rightboundary) str[a] = 0;
+long stripallbut(char *str, char leftboundary, char rightboundary) {
+	long len = 0;
+
+	char *lb = NULL;
+	char *rb = NULL;
+
+	lb = strrchr(str, leftboundary);
+	if (lb != NULL) {
+		++lb;
+		rb = strchr(str, rightboundary);
+		if ((rb != NULL) && (rb >= lb))  {
+			*rb = 0;
+			fflush(stderr);
+			len = (long)rb - (long)lb;
+			memmove(str, lb, len);
+			str[len] = 0;
+			return(len);
+		}
 	}
 
+	return (long)strlen(str);
 }
 
+
 char *myfgets(char *s, int size, FILE *stream) {
 	char *ret = fgets(s, size, stream);
 	char *nl;
@@ -711,7 +814,7 @@ void urlesc(char *outbuf, size_t oblen, char *strbuf)
 	int a, b, c, len, eclen, olen;
 	char *ec = " +#&;`'|*?-~<>^()[]{}/$\"\\";
 
-	strcpy(outbuf, "");
+	*outbuf = '\0';
 	len = strlen(strbuf);
 	eclen = strlen(ec);
 	olen = 0;
@@ -749,12 +852,41 @@ char *strcpy(char *dest, const char *src) {
  * Generate a new, globally unique UID parameter for a calendar etc. object
  */
 void generate_uuid(char *buf) {
-	static int seq = 0;
+	static int seq = (-1);
+	static int no_kernel_uuid = 0;
+
+	/* If we are running on Linux then we have a kernelspace uuid generator available */
+
+	if (no_kernel_uuid == 0) {
+		FILE *fp;
+		fp = fopen("/proc/sys/kernel/random/uuid", "rb");
+		if (fp) {
+			int rv;
+			rv = fread(buf, 36, 1, fp);
+			fclose(fp);
+			if (rv == 1) {
+				buf[36] = 0;
+				return;
+			}
+		}
+	}
 
-	sprintf(buf, "%lx-%lx-%x",
-		time(NULL),
+	/* If the kernel didn't provide us with a uuid, we generate a pseudo-random one */
+
+	no_kernel_uuid = 1;
+
+	if (seq == (-1)) {
+		seq = (int)rand();
+	}
+	++seq;
+	seq = (seq % 0x0FFF) ;
+
+	sprintf(buf, "%08lx-%04lx-4%03x-a%03x-%012lx",
+		(long)time(NULL),
 		(long)getpid(),
-		(seq++)
+		seq,
+		seq,
+		(long)rand()
 	);
 }
 
@@ -765,12 +897,65 @@ void generate_uuid(char *buf) {
  * The code is roughly based on the strstr() replacement from 'tin' written
  * by Urs Jannsen.
  */
-char *bmstrcasestr(char *text, char *pattern) {
+inline static char *_bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
 
 	register unsigned char *p, *t;
 	register int i, j, *delta;
 	register size_t p1;
 	int deltaspace[256];
+
+	if (!text) return(NULL);
+	if (!pattern) return(NULL);
+
+	/* algorithm fails if pattern is empty */
+	if ((p1 = patlen) == 0)
+		return (text);
+
+	/* code below fails (whenever i is unsigned) if pattern too long */
+	if (p1 > textlen)
+		return (NULL);
+
+	/* set up deltas */
+	delta = deltaspace;
+	for (i = 0; i <= 255; i++)
+		delta[i] = p1;
+	for (p = (unsigned char *) pattern, i = p1; --i > 0;)
+		delta[tolower(*p++)] = i;
+
+	/*
+	 * From now on, we want patlen - 1.
+	 * In the loop below, p points to the end of the pattern,
+	 * t points to the end of the text to be tested against the
+	 * pattern, and i counts the amount of text remaining, not
+	 * including the part to be tested.
+	 */
+	p1--;
+	p = (unsigned char *) pattern + p1;
+	t = (unsigned char *) text + p1;
+	i = textlen - patlen;
+	while(1) {
+		if (tolower(p[0]) == tolower(t[0])) {
+			if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
+				return ((char *)t - p1);
+			}
+		}
+		j = delta[tolower(t[0])];
+		if (i < j)
+			break;
+		i -= j;
+		t += j;
+	}
+	return (NULL);
+}
+
+/*
+ * bmstrcasestr() -- case-insensitive substring search
+ *
+ * This uses the Boyer-Moore search algorithm and is therefore quite fast.
+ * The code is roughly based on the strstr() replacement from 'tin' written
+ * by Urs Jannsen.
+ */
+char *bmstrcasestr(char *text, const char *pattern) {
 	size_t textlen;
 	size_t patlen;
 
@@ -780,6 +965,33 @@ char *bmstrcasestr(char *text, char *pattern) {
 	textlen = strlen (text);
 	patlen = strlen (pattern);
 
+	return _bmstrcasestr_len(text, textlen, pattern, patlen);
+}
+
+char *bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
+	return _bmstrcasestr_len(text, textlen, pattern, patlen);
+}
+
+
+
+
+/*
+ * bmstrcasestr() -- case-insensitive substring search
+ *
+ * This uses the Boyer-Moore search algorithm and is therefore quite fast.
+ * The code is roughly based on the strstr() replacement from 'tin' written
+ * by Urs Jannsen.
+ */
+inline static const char *_cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
+
+	register unsigned char *p, *t;
+	register int i, j, *delta;
+	register size_t p1;
+	int deltaspace[256];
+
+	if (!text) return(NULL);
+	if (!pattern) return(NULL);
+
 	/* algorithm fails if pattern is empty */
 	if ((p1 = patlen) == 0)
 		return (text);
@@ -821,7 +1033,29 @@ char *bmstrcasestr(char *text, char *pattern) {
 	return (NULL);
 }
 
+/*
+ * bmstrcasestr() -- case-insensitive substring search
+ *
+ * This uses the Boyer-Moore search algorithm and is therefore quite fast.
+ * The code is roughly based on the strstr() replacement from 'tin' written
+ * by Urs Jannsen.
+ */
+const char *cbmstrcasestr(const char *text, const char *pattern) {
+	size_t textlen;
+	size_t patlen;
+
+	if (!text) return(NULL);
+	if (!pattern) return(NULL);
+
+	textlen = strlen (text);
+	patlen = strlen (pattern);
 
+	return _cbmstrcasestr_len(text, textlen, pattern, patlen);
+}
+
+const char *cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
+	return _cbmstrcasestr_len(text, textlen, pattern, patlen);
+}
 
 /*
  * Local replacement for controversial C library function that generates
@@ -847,7 +1081,7 @@ void CtdlMakeTempFileName(char *name, int len) {
  * Determine whether the specified message number is contained within the specified set.
  * Returns nonzero if the specified message number is in the specified message set string.
  */
-int is_msg_in_mset(char *mset, long msgnum) {
+int is_msg_in_mset(const char *mset, long msgnum) {
 	int num_sets;
 	int s;
 	char setstr[SIZ], lostr[SIZ], histr[SIZ];       /* was 1024 */
@@ -881,10 +1115,8 @@ int is_msg_in_mset(char *mset, long msgnum) {
 
 
 /*
- * \brief searches for a  paternn within asearch string
- * \param search the string to search 
- * \param patn the pattern to find in string
- * \returns position in string
+ * searches for a pattern within a search string
+ * returns position in string
  */
 int pattern2(char *search, char *patn)
 {
@@ -921,97 +1153,6 @@ void stripltlen(char *buf, int *len)
 	}
 }
 
-/**
- * \brief detect whether this char starts an utf-8 encoded char
- * \param Char character to inspect
- * \returns yes or no
- */
-inline int Ctdl_IsUtf8SequenceStart(char Char)
-{
-/** 11??.???? indicates an UTF8 Sequence. */
-	return ((Char & 0xC0) != 0);
-}
-
-/**
- * \brief evaluate the length of an utf8 special character sequence
- * \param Char the character to examine
- * \returns width of utf8 chars in bytes
- */
-inline int Ctdl_GetUtf8SequenceLength(char Char)
-{
-	int n = 1;
-        char test = (1<<7);
-	
-	while ((n < 8) && ((test & Char) != 0)) {
-		test = test << 1;
-		n ++;
-	}
-	if (n > 6)
-		n = 1;
-	return n;
-}
-
-/**
- * \brief measure the number of glyphs in an UTF8 string...
- * \param str string to measure
- * \returns the length of str
- */
-int Ctdl_Utf8StrLen(char *str)
-{
-	int n = 0;
-	int m = 0;
-	char *aptr;
-
-	if (str == NULL)
-		return n;
-	aptr = str;
-	while (*aptr != '\0') {
-		if (Ctdl_IsUtf8SequenceStart(*aptr)){
-			m = Ctdl_GetUtf8SequenceLength(*aptr);
-			while ((m-- > 0) && (*aptr++ != '\0'))
-				n ++;
-		}
-		else {
-			n++;
-			aptr++;
-		}
-			
-	}
-	return n;
-}
-
-/**
- * \brief cuts a string after maxlen glyphs
- * \param str string to cut to maxlen glyphs
- * \param maxlen how long may the string become?
- * \returns pointer to maxlen or the end of the string
- */
-char *Ctdl_Utf8StrCut(char *str, int maxlen)
-{
-	int n = 0, m = 0;
-	char *aptr;
-
-	if (str == NULL)
-		return NULL;
-	aptr = str;
-	while (*aptr != '\0') {
-		if (Ctdl_IsUtf8SequenceStart(*aptr)){
-			m = Ctdl_GetUtf8SequenceLength(*aptr);
-			while ((m-- > 0) && (*aptr++ != '\0'))
-				n ++;
-		}
-		else {
-			n++;
-			aptr++;
-		}
-		if (n > maxlen) {
-			*aptr = '\0';
-			return aptr;
-		}			
-	}
-	return aptr;
-}
-
 
 /*
  * Convert all whitespace characters in a supplied string to underscores
@@ -1032,59 +1173,20 @@ void convert_spaces_to_underscores(char *str)
 }
 
 
-/* 
- * Parse a URL into host, port number, and resource identifier.
- * (This is used by various functions which might need to fetch web pages.)
+/*
+ * check whether the provided string needs to be qp encoded or not
  */
-int parse_url(char *url, char *hostname, int *port, char *identifier)
+int CheckEncode(const char *pch, long len, const char *pche)
 {
-	char protocol[1024];
-	char scratch[1024];
-	char *ptr = NULL;
-	char *nptr = NULL;
-	
-	strcpy(scratch, url);
-	ptr = (char *)strchr(scratch, ':');
-	if (!ptr) {
-		return(1);	/* no protocol specified */
-	}
-
-	strcpy(ptr, "");
-	strcpy(protocol, scratch);
-	if (strcmp(protocol, "http")) {
-		return(2);	/* not HTTP */
-	}
-
-	strcpy(scratch, url);
-	ptr = (char *) strstr(scratch, "//");
-	if (!ptr) {
-		return(3);	/* no server specified */
-	}
-	ptr += 2;
-
-	strcpy(hostname, ptr);
-	nptr = (char *)strchr(ptr, ':');
-	if (!nptr) {
-		*port = 80;	/* default */
-		nptr = (char *)strchr(hostname, '/');
-	}
-	else {
-		sscanf(nptr, ":%d", port);
-		nptr = (char *)strchr(hostname, ':');
-	}
-
-	if (nptr) {
-		*nptr = '\0';
-	}
-
-	nptr = (char *)strchr(ptr, '/');
-	
-	if (!nptr) {
-		return(4);	/* no url specified */
+	if (pche == NULL)
+		pche = pch + len;
+	while (pch < pche) {
+		if (((unsigned char) *pch < 32) || 
+		    ((unsigned char) *pch > 126)) {
+			return 1;
+		}
+		pch++;
 	}
-	
-	strcpy(identifier, nptr);
-	return(0);
+	return 0;
 }
 
-