2 * A basic toolset containing miscellaneous functions for string manipluation,
3 * encoding/decoding, and a bunch of other stuff.
5 * Copyright (c) 1987-2011 by the citadel.org team
7 * This program is open source software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include <sys/types.h>
33 #include "b64/cencode.h"
34 #include "b64/cdecode.h"
36 #if TIME_WITH_SYS_TIME
37 # include <sys/time.h>
41 # include <sys/time.h>
47 #include "libcitadel.h"
53 typedef unsigned char byte; /* Byte type */
56 * copy a string into a buffer of a known size. abort if we exceed the limits
58 * dest the targetbuffer
59 * src the source string
62 * returns the number of characters copied if dest is big enough, -n if not.
64 int safestrncpy(char *dest, const char *src, size_t n)
68 if (dest == NULL || src == NULL) {
69 fprintf(stderr, "safestrncpy: NULL argument\n");
75 if (dest[i] == 0) return i;
85 * num_tokens() - discover number of parameters/tokens in a string
87 int num_tokens(const char *source, char tok)
90 const char *ptr = source;
96 while (*ptr != '\0') {
105 //extern void cit_backtrace(void);
109 * extract_token() - a string tokenizer
110 * returns -1 if not found, or length of token.
112 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen)
114 const char *s; //* source * /
115 int len = 0; //* running total length of extracted string * /
116 int current_token = 0; //* token currently being processed * /
125 //lprintf (CTDL_DEBUG, "test >: n: %d sep: %c source: %s \n willi \n", parmnum, separator, source);
135 if (*s == separator) {
138 if ( (current_token == parmnum) &&
144 else if ((current_token > parmnum) || (len >= maxlen)) {
151 if (current_token < parmnum) {
152 //lprintf (CTDL_DEBUG,"test <!: %s\n", dest);
155 //lprintf (CTDL_DEBUG,"test <: %d; %s\n", len, dest);
162 * extract_token() - a string tokenizer
164 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen)
167 const char *s; // source
173 //lprintf (CTDL_DEBUG, "test >: n: %d sep: %c source: %s \n willi \n", parmnum, separator, source);
176 // Locate desired parameter
178 while (count < parmnum) {
179 // End of string, bail!
184 if (*s == separator) {
190 //lprintf (CTDL_DEBUG,"test <!: %s\n", dest);
191 return -1; // Parameter not found
194 for (d = dest; *s && *s != separator && ++len<maxlen; s++, d++) {
198 //lprintf (CTDL_DEBUG,"test <: %d; %s\n", len, dest);
205 * remove_token() - a tokenizer that kills, maims, and destroys
207 void remove_token(char *source, int parmnum, char separator)
209 char *d, *s; /* dest, source */
212 /* Find desired parameter */
214 while (count < parmnum) {
215 /* End of string, bail! */
220 if (*d == separator) {
225 if (!d) return; /* Parameter not found */
227 /* Find next parameter */
229 while (*s && *s != separator) {
236 else if (d == source)
250 * extract_int() - extract an int parm w/o supplying a buffer
252 int extract_int(const char *source, int parmnum)
256 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
263 * extract_long() - extract an long parm w/o supplying a buffer
265 long extract_long(const char *source, int parmnum)
269 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
277 * extract_unsigned_long() - extract an unsigned long parm
279 unsigned long extract_unsigned_long(const char *source, int parmnum)
283 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
284 return strtoul(buf, NULL, 10);
289 size_t CtdlEncodeBase64(char *dest, const char *source, size_t sourcelen, int linebreaks)
291 // linebreaks at 70 are ugly for base64, since 3 bytes in makes 4 bytes out
292 int breaklength = 68;
293 int readlength = 3 * breaklength / 4;
300 base64_encodestate _state;
302 base64_init_encodestate(&_state);
305 sourceremaining = sourcelen;
309 while (sourceremaining > 0) {
310 destoffset += base64_encode_block(
311 &(source[sourceoffset]),
312 (readlength > sourceremaining ? sourceremaining : readlength),
315 sourceoffset += readlength;
316 sourceremaining -= readlength;
317 dest[destoffset++] = '\r';
318 dest[destoffset++] = '\n';
322 destoffset += base64_encode_blockend(&(dest[destoffset]), &_state);
323 if (t < destoffset) {
324 dest[destoffset++] = '\r';
325 dest[destoffset++] = '\n';
329 destoffset = base64_encode_block(source, sourcelen, dest, &_state);
331 destoffset += base64_encode_blockend(&(dest[destoffset]), &_state);
333 dest[destoffset] = 0;
339 * Convert base64-encoded to binary. Returns the length of the decoded data.
340 * It will stop after reading 'length' bytes.
342 int CtdlDecodeBase64(char *dest, const char *source, size_t length)
344 base64_decodestate _state;
347 base64_init_decodestate(&_state);
349 len = base64_decode_block(source, length, dest, &_state);
356 * if we send out non ascii subjects, we encode it this way.
358 char *rfc2047encode(const char *line, long length)
360 const char *AlreadyEncoded;
363 #define UTF8_HEADER "=?UTF-8?B?"
365 /* check if we're already done */
366 AlreadyEncoded = strstr(line, "=?");
367 if ((AlreadyEncoded != NULL) &&
368 ((strstr(AlreadyEncoded, "?B?") != NULL)||
369 (strstr(AlreadyEncoded, "?Q?") != NULL)))
374 result = (char*) malloc(sizeof(UTF8_HEADER) + 4 + length * 2);
375 strncpy (result, UTF8_HEADER, strlen (UTF8_HEADER));
376 CtdlEncodeBase64(result + strlen(UTF8_HEADER), line, length, 0);
377 end = strlen (result);
385 * removes double slashes from pathnames
386 * allows / disallows trailing slashes
388 void StripSlashes(char *Dir, int TrailingSlash)
394 while (!IsEmptyStr(a)) {
406 if ((TrailingSlash) && (*(b - 1) != '/')){
415 * Strip leading and trailing spaces from a string
417 size_t striplt(char *buf) {
418 char *first_nonspace = NULL;
419 char *last_nonspace = NULL;
423 if ((buf == NULL) || (*buf == '\0')) {
427 for (ptr=buf; *ptr!=0; ++ptr) {
428 if (!isspace(*ptr)) {
429 if (!first_nonspace) {
430 first_nonspace = ptr;
436 if ((!first_nonspace) || (!last_nonspace)) {
441 new_len = last_nonspace - first_nonspace + 1;
442 memmove(buf, first_nonspace, new_len);
449 * \brief check for the presence of a character within a string (returns count)
450 * \param st the string to examine
451 * \param ch the char to search
452 * \return the number of times ch appears in st
454 int haschar(const char *st, int ch)
460 while (!IsEmptyStr(ptr))
474 * Format a date/time stamp for output
475 * seconds is whether to print the seconds
477 void fmt_date(char *buf, size_t n, time_t thetime, int seconds) {
479 char *teh_format = NULL;
482 localtime_r(&thetime, &tm);
485 teh_format = "%F %R:%S";
488 teh_format = "%F %R";
491 strftime(buf, n, teh_format, &tm);
497 * Determine whether the specified message number is contained within the
498 * specified sequence set.
500 int is_msg_in_sequence_set(const char *mset, long msgnum) {
503 char setstr[128], lostr[128], histr[128];
506 num_sets = num_tokens(mset, ',');
507 for (s=0; s<num_sets; ++s) {
508 extract_token(setstr, mset, s, ',', sizeof setstr);
510 extract_token(lostr, setstr, 0, ':', sizeof lostr);
511 if (num_tokens(setstr, ':') >= 2) {
512 extract_token(histr, setstr, 1, ':', sizeof histr);
513 if (!strcmp(histr, "*")) {
514 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
518 strcpy(histr, lostr);
523 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
530 * \brief Utility function to "readline" from memory
531 * \param start Location in memory from which we are reading.
532 * \param buf the buffer to place the string in.
533 * \param maxlen Size of string buffer
534 * \return Pointer to the source memory right after we stopped reading.
536 char *memreadline(char *start, char *buf, int maxlen)
540 int len = 0; /**< tally our own length to avoid strlen() delays */
546 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
549 if ((ch == 10) || (ch == 0)) {
558 * \brief Utility function to "readline" from memory
559 * \param start Location in memory from which we are reading.
560 * \param buf the buffer to place the string in.
561 * \param maxlen Size of string buffer
562 * \param retlen the length of the returned string
563 * \return Pointer to the source memory right after we stopped reading.
565 char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen)
569 int len = 0; /**< tally our own length to avoid strlen() delays */
575 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
578 if ((ch == 10) || (ch == 0)) {
588 * \brief Utility function to "readline" from memory
589 * \param start Location in memory from which we are reading.
590 * \param buf the buffer to place the string in.
591 * \param maxlen Size of string buffer
592 * \return Pointer to the source memory right after we stopped reading.
594 const char *cmemreadline(const char *start, char *buf, int maxlen)
598 int len = 0; /**< tally our own length to avoid strlen() delays */
604 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
607 if ((ch == 10) || (ch == 0)) {
616 * \brief Utility function to "readline" from memory
617 * \param start Location in memory from which we are reading.
618 * \param buf the buffer to place the string in.
619 * \param maxlen Size of string buffer
620 * \param retlen the length of the returned string
621 * \return Pointer to the source memory right after we stopped reading.
623 const char *cmemreadlinelen(const char *start, char *buf, int maxlen, int *retlen)
627 int len = 0; /**< tally our own length to avoid strlen() delays */
633 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
636 if ((ch == 10) || (ch == 0)) {
648 * Strip a boundarized substring out of a string (for example, remove
649 * parentheses and anything inside them).
651 int stripout(char *str, char leftboundary, char rightboundary) {
656 for (a = 0; a < strlen(str); ++a) {
657 if (str[a] == leftboundary) lb = a;
658 if (str[a] == rightboundary) rb = a;
661 if ( (lb > 0) && (rb > lb) ) {
662 strcpy(&str[lb - 1], &str[rb + 1]);
666 else if ( (lb == 0) && (rb > lb) ) {
667 strcpy(str, &str[rb + 1]);
675 * Reduce a string down to a boundarized substring (for example, remove
676 * parentheses and anything outside them).
678 long stripallbut(char *str, char leftboundary, char rightboundary) {
684 lb = strrchr(str, leftboundary);
687 rb = strchr(str, rightboundary);
688 if ((rb != NULL) && (rb >= lb)) {
691 len = (long)rb - (long)lb;
692 memmove(str, lb, len);
698 return (long)strlen(str);
702 char *myfgets(char *s, int size, FILE *stream) {
703 char *ret = fgets(s, size, stream);
707 nl = strchr(s, '\n');
717 * \brief Escape a string for feeding out as a URL.
718 * \param outbuf the output buffer
719 * \param oblen the size of outbuf to sanitize
720 * \param strbuf the input buffer
722 void urlesc(char *outbuf, size_t oblen, char *strbuf)
724 int a, b, c, len, eclen, olen;
725 char *ec = " +#&;`'|*?-~<>^()[]{}/$\"\\";
728 len = strlen(strbuf);
731 for (a = 0; a < len; ++a) {
733 for (b = 0; b < eclen; ++b) {
734 if (strbuf[a] == ec[b])
738 snprintf(&outbuf[olen], oblen - olen, "%%%02x", strbuf[a]);
742 outbuf[olen ++] = strbuf[a];
750 * In our world, we want strcpy() to be able to work with overlapping strings.
755 char *strcpy(char *dest, const char *src) {
756 memmove(dest, src, (strlen(src) + 1) );
762 * Generate a new, globally unique UID parameter for a calendar etc. object
764 void generate_uuid(char *buf) {
765 static int seq = (-1);
766 static int no_kernel_uuid = 0;
768 /* If we are running on Linux then we have a kernelspace uuid generator available */
770 if (no_kernel_uuid == 0) {
772 fp = fopen("/proc/sys/kernel/random/uuid", "rb");
775 rv = fread(buf, 36, 1, fp);
784 /* If the kernel didn't provide us with a uuid, we generate a pseudo-random one */
792 seq = (seq % 0x0FFF) ;
794 sprintf(buf, "%08lx-%04lx-4%03x-a%03x-%012lx",
804 * bmstrcasestr() -- case-insensitive substring search
806 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
807 * The code is roughly based on the strstr() replacement from 'tin' written
810 inline static char *_bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
812 register unsigned char *p, *t;
813 register int i, j, *delta;
817 if (!text) return(NULL);
818 if (!pattern) return(NULL);
820 /* algorithm fails if pattern is empty */
821 if ((p1 = patlen) == 0)
824 /* code below fails (whenever i is unsigned) if pattern too long */
830 for (i = 0; i <= 255; i++)
832 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
833 delta[tolower(*p++)] = i;
836 * From now on, we want patlen - 1.
837 * In the loop below, p points to the end of the pattern,
838 * t points to the end of the text to be tested against the
839 * pattern, and i counts the amount of text remaining, not
840 * including the part to be tested.
843 p = (unsigned char *) pattern + p1;
844 t = (unsigned char *) text + p1;
845 i = textlen - patlen;
847 if (tolower(p[0]) == tolower(t[0])) {
848 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
849 return ((char *)t - p1);
852 j = delta[tolower(t[0])];
862 * bmstrcasestr() -- case-insensitive substring search
864 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
865 * The code is roughly based on the strstr() replacement from 'tin' written
868 char *bmstrcasestr(char *text, const char *pattern) {
872 if (!text) return(NULL);
873 if (!pattern) return(NULL);
875 textlen = strlen (text);
876 patlen = strlen (pattern);
878 return _bmstrcasestr_len(text, textlen, pattern, patlen);
881 char *bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
882 return _bmstrcasestr_len(text, textlen, pattern, patlen);
889 * bmstrcasestr() -- case-insensitive substring search
891 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
892 * The code is roughly based on the strstr() replacement from 'tin' written
895 inline static const char *_cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
897 register unsigned char *p, *t;
898 register int i, j, *delta;
902 if (!text) return(NULL);
903 if (!pattern) return(NULL);
905 /* algorithm fails if pattern is empty */
906 if ((p1 = patlen) == 0)
909 /* code below fails (whenever i is unsigned) if pattern too long */
915 for (i = 0; i <= 255; i++)
917 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
918 delta[tolower(*p++)] = i;
921 * From now on, we want patlen - 1.
922 * In the loop below, p points to the end of the pattern,
923 * t points to the end of the text to be tested against the
924 * pattern, and i counts the amount of text remaining, not
925 * including the part to be tested.
928 p = (unsigned char *) pattern + p1;
929 t = (unsigned char *) text + p1;
930 i = textlen - patlen;
932 if (tolower(p[0]) == tolower(t[0])) {
933 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
934 return ((char *)t - p1);
937 j = delta[tolower(t[0])];
947 * bmstrcasestr() -- case-insensitive substring search
949 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
950 * The code is roughly based on the strstr() replacement from 'tin' written
953 const char *cbmstrcasestr(const char *text, const char *pattern) {
957 if (!text) return(NULL);
958 if (!pattern) return(NULL);
960 textlen = strlen (text);
961 patlen = strlen (pattern);
963 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
966 const char *cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
967 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
971 * Local replacement for controversial C library function that generates
972 * names for temporary files. Included to shut up compiler warnings.
974 void CtdlMakeTempFileName(char *name, int len) {
977 while (i++, i < 100) {
978 snprintf(name, len, "/tmp/ctdl.%04lx.%04x",
982 if (!access(name, F_OK)) {
991 * Determine whether the specified message number is contained within the specified set.
992 * Returns nonzero if the specified message number is in the specified message set string.
994 int is_msg_in_mset(const char *mset, long msgnum) {
997 char setstr[SIZ], lostr[SIZ], histr[SIZ]; /* was 1024 */
1001 * Now set it for all specified messages.
1003 num_sets = num_tokens(mset, ',');
1004 for (s=0; s<num_sets; ++s) {
1005 extract_token(setstr, mset, s, ',', sizeof setstr);
1007 extract_token(lostr, setstr, 0, ':', sizeof lostr);
1008 if (num_tokens(setstr, ':') >= 2) {
1009 extract_token(histr, setstr, 1, ':', sizeof histr);
1010 if (!strcmp(histr, "*")) {
1011 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
1015 strcpy(histr, lostr);
1020 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
1028 * searches for a pattern within a search string
1029 * returns position in string
1031 int pattern2(char *search, char *patn)
1035 len = strlen (search);
1036 plen = strlen (patn);
1037 for (a = 0; a < len; ++a) {
1038 if (!strncasecmp(&search[a], patn, plen))
1046 * Strip leading and trailing spaces from a string; with premeasured and adjusted length.
1047 * buf - the string to modify
1048 * len - length of the string.
1050 void stripltlen(char *buf, int *len)
1053 if (*len == 0) return;
1054 while ((*len > delta) && (isspace(buf[delta]))){
1057 memmove (buf, &buf[delta], *len - delta + 1);
1060 if (*len == 0) return;
1061 while (isspace(buf[(*len) - 1])){
1062 buf[--(*len)] = '\0';
1068 * Convert all whitespace characters in a supplied string to underscores
1070 void convert_spaces_to_underscores(char *str)
1078 for (i=0; i<len; ++i) {
1079 if (isspace(str[i])) {
1087 * check whether the provided string needs to be qp encoded or not
1089 int CheckEncode(const char *pch, long len, const char *pche)
1093 while (pch < pche) {
1094 if (((unsigned char) *pch < 32) ||
1095 ((unsigned char) *pch > 126)) {