1 // A basic toolset containing miscellaneous functions for string manipluation,
2 // encoding/decoding, and a bunch of other stuff.
4 // Copyright (c) 1987-2022 by the citadel.org team
6 // This program is open source software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or
9 // (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <sys/types.h>
32 #if TIME_WITH_SYS_TIME
33 # include <sys/time.h>
37 # include <sys/time.h>
43 #include "libcitadel.h"
49 typedef unsigned char byte; /* Byte type */
51 // copy a string into a buffer of a known size. abort if we exceed the limits
53 // dest the targetbuffer
54 // src the source string
57 // returns the number of characters copied if dest is big enough, -n if not.
58 int safestrncpy(char *dest, const char *src, size_t n) {
61 if (dest == NULL || src == NULL)
63 fprintf(stderr, "safestrncpy: NULL argument\n");
69 if (dest[i] == 0) return i;
77 // num_tokens() - discover number of parameters/tokens in a string
78 int num_tokens(const char *source, char tok) {
80 const char *ptr = source;
86 while (*ptr != '\0') {
96 // extract_token() - a string tokenizer
97 // returns -1 if not found, or length of token.
98 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen) {
99 const char *s; // source
100 int len = 0; // running total length of extracted string
101 int current_token = 0; // token currently being processed
118 if (*s == separator) {
121 if ( (current_token == parmnum) && (*s != separator) && (len < maxlen) ) {
125 else if ((current_token > parmnum) || (len >= maxlen)) {
132 if (current_token < parmnum) {
139 // remove_token() - a tokenizer that kills, maims, and destroys
140 void remove_token(char *source, int parmnum, char separator) {
141 char *d, *s; // dest, source
144 /* Find desired parameter */
146 while (count < parmnum) {
147 // End of string, bail!
152 if (*d == separator) {
157 if (!d) return; // Parameter not found
159 // Find next parameter
161 while (*s && *s != separator) {
168 else if (d == source)
175 // extract_int() - extract an int parm without supplying a buffer
176 int extract_int(const char *source, int parmnum) {
179 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
186 // extract_long() - extract an long parm without supplying a buffer
187 long extract_long(const char *source, int parmnum) {
190 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
197 // extract_unsigned_long() - extract an unsigned long parm
198 unsigned long extract_unsigned_long(const char *source, int parmnum) {
201 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
202 return strtoul(buf, NULL, 10);
208 // if we send out non ascii subjects, we encode it this way.
209 char *rfc2047encode(const char *line, long length) {
210 const char *AlreadyEncoded;
213 #define UTF8_HEADER "=?UTF-8?B?"
215 /* check if we're already done */
216 AlreadyEncoded = strstr(line, "=?");
217 if ((AlreadyEncoded != NULL) && ((strstr(AlreadyEncoded, "?B?") != NULL)|| (strstr(AlreadyEncoded, "?Q?") != NULL))) {
221 result = (char*) malloc(sizeof(UTF8_HEADER) + 4 + length * 2);
222 strncpy (result, UTF8_HEADER, strlen (UTF8_HEADER));
223 CtdlEncodeBase64(result + strlen(UTF8_HEADER), line, length, 0);
224 end = strlen (result);
231 // removes double slashes from pathnames
232 // allows / disallows trailing slashes
233 void StripSlashes(char *Dir, int TrailingSlash) {
238 while (!IsEmptyStr(a)) {
250 if ((TrailingSlash) && (*(b - 1) != '/')){
259 // Strip leading and trailing spaces from a string
260 size_t striplt(char *buf) {
261 char *first_nonspace = NULL;
262 char *last_nonspace = NULL;
266 if ((buf == NULL) || (*buf == '\0')) {
270 for (ptr=buf; *ptr!=0; ++ptr) {
271 if (!isspace(*ptr)) {
272 if (!first_nonspace) {
273 first_nonspace = ptr;
279 if ((!first_nonspace) || (!last_nonspace)) {
284 new_len = last_nonspace - first_nonspace + 1;
285 memmove(buf, first_nonspace, new_len);
292 * check for the presence of a character within a string (returns count)
293 * st the string to examine
294 * ch the char to search
295 * returns the number of times ch appears in st
297 int haschar(const char *st, int ch)
303 while (!IsEmptyStr(ptr))
314 * Determine whether the specified message number is contained within the
315 * specified sequence set.
317 int is_msg_in_sequence_set(const char *mset, long msgnum)
321 char setstr[128], lostr[128], histr[128];
324 num_sets = num_tokens(mset, ',');
325 for (s=0; s<num_sets; ++s) {
326 extract_token(setstr, mset, s, ',', sizeof setstr);
328 extract_token(lostr, setstr, 0, ':', sizeof lostr);
329 if (num_tokens(setstr, ':') >= 2)
331 extract_token(histr, setstr, 1, ':', sizeof histr);
332 if (!strcmp(histr, "*"))
334 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
339 strcpy(histr, lostr);
344 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
351 * Utility function to "readline" from memory
352 * start Location in memory from which we are reading.
353 * buf the buffer to place the string in.
354 * maxlen Size of string buffer
355 * returns pointer to the source memory right after we stopped reading.
357 char *memreadline(char *start, char *buf, int maxlen)
361 int len = 0; /* tally our own length to avoid strlen() delays */
367 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
370 if ((ch == 10) || (ch == 0)) {
379 * Utility function to "readline" from memory
380 * start Location in memory from which we are reading.
381 * buf the buffer to place the string in.
382 * maxlen Size of string buffer
383 * retlen the length of the returned string
384 * returns a pointer to the source memory right after we stopped reading.
386 char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen)
390 int len = 0; /* tally our own length to avoid strlen() delays */
397 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10))
401 if ((ch == 10) || (ch == 0))
412 * \brief Utility function to "readline" from memory
413 * \param start Location in memory from which we are reading.
414 * \param buf the buffer to place the string in.
415 * \param maxlen Size of string buffer
416 * \return Pointer to the source memory right after we stopped reading.
418 const char *cmemreadline(const char *start, char *buf, int maxlen)
422 int len = 0; /**< tally our own length to avoid strlen() delays */
428 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
431 if ((ch == 10) || (ch == 0)) {
440 * \brief Utility function to "readline" from memory
441 * \param start Location in memory from which we are reading.
442 * \param buf the buffer to place the string in.
443 * \param maxlen Size of string buffer
444 * \param retlen the length of the returned string
445 * \return Pointer to the source memory right after we stopped reading.
447 const char *cmemreadlinelen(const char *start, char *buf, int maxlen, int *retlen)
451 int len = 0; /**< tally our own length to avoid strlen() delays */
457 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
460 if ((ch == 10) || (ch == 0)) {
470 * Strip a boundarized substring out of a string (for example, remove
471 * parentheses and anything inside them).
473 int stripout(char *str, char leftboundary, char rightboundary) {
478 for (a = 0; a < strlen(str); ++a) {
479 if (str[a] == leftboundary) lb = a;
480 if (str[a] == rightboundary) rb = a;
483 if ( (lb > 0) && (rb > lb) ) {
484 strcpy(&str[lb - 1], &str[rb + 1]);
488 else if ( (lb == 0) && (rb > lb) ) {
489 strcpy(str, &str[rb + 1]);
497 * Reduce a string down to a boundarized substring (for example, remove
498 * parentheses and anything outside them).
500 long stripallbut(char *str, char leftboundary, char rightboundary) {
506 lb = strrchr(str, leftboundary);
509 rb = strchr(str, rightboundary);
510 if ((rb != NULL) && (rb >= lb)) {
513 len = (long)rb - (long)lb;
514 memmove(str, lb, len);
520 return (long)strlen(str);
524 char *myfgets(char *s, int size, FILE *stream) {
525 char *ret = fgets(s, size, stream);
529 nl = strchr(s, '\n');
539 * \brief Escape a string for feeding out as a URL.
540 * \param outbuf the output buffer
541 * \param oblen the size of outbuf to sanitize
542 * \param strbuf the input buffer
544 void urlesc(char *outbuf, size_t oblen, char *strbuf)
546 int a, b, c, len, eclen, olen;
547 char *ec = " +#&;`'|*?-~<>^()[]{}/$\"\\";
550 len = strlen(strbuf);
553 for (a = 0; a < len; ++a) {
555 for (b = 0; b < eclen; ++b) {
556 if (strbuf[a] == ec[b])
560 snprintf(&outbuf[olen], oblen - olen, "%%%02x", strbuf[a]);
564 outbuf[olen ++] = strbuf[a];
572 * In our world, we want strcpy() to be able to work with overlapping strings.
577 char *strcpy(char *dest, const char *src) {
578 memmove(dest, src, (strlen(src) + 1) );
584 * Generate a new, globally unique UID parameter for a calendar etc. object
586 void generate_uuid(char *buf) {
587 static int seq = (-1);
588 static int no_kernel_uuid = 0;
590 /* If we are running on Linux then we have a kernelspace uuid generator available */
592 if (no_kernel_uuid == 0) {
594 fp = fopen("/proc/sys/kernel/random/uuid", "rb");
597 rv = fread(buf, 36, 1, fp);
606 /* If the kernel didn't provide us with a uuid, we generate a pseudo-random one */
614 seq = (seq % 0x0FFF) ;
616 sprintf(buf, "%08lx-%04lx-4%03x-a%03x-%012lx",
626 * bmstrcasestr() -- case-insensitive substring search
628 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
629 * The code is roughly based on the strstr() replacement from 'tin' written
632 inline static char *_bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
634 register unsigned char *p, *t;
635 register int i, j, *delta;
639 if (!text) return(NULL);
640 if (!pattern) return(NULL);
642 /* algorithm fails if pattern is empty */
643 if ((p1 = patlen) == 0)
646 /* code below fails (whenever i is unsigned) if pattern too long */
652 for (i = 0; i <= 255; i++)
654 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
655 delta[tolower(*p++)] = i;
658 * From now on, we want patlen - 1.
659 * In the loop below, p points to the end of the pattern,
660 * t points to the end of the text to be tested against the
661 * pattern, and i counts the amount of text remaining, not
662 * including the part to be tested.
665 p = (unsigned char *) pattern + p1;
666 t = (unsigned char *) text + p1;
667 i = textlen - patlen;
669 if (tolower(p[0]) == tolower(t[0])) {
670 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
671 return ((char *)t - p1);
674 j = delta[tolower(t[0])];
684 * bmstrcasestr() -- case-insensitive substring search
686 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
687 * The code is roughly based on the strstr() replacement from 'tin' written
690 char *bmstrcasestr(char *text, const char *pattern) {
694 if (!text) return(NULL);
695 if (!pattern) return(NULL);
697 textlen = strlen (text);
698 patlen = strlen (pattern);
700 return _bmstrcasestr_len(text, textlen, pattern, patlen);
703 char *bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
704 return _bmstrcasestr_len(text, textlen, pattern, patlen);
711 * bmstrcasestr() -- case-insensitive substring search
713 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
714 * The code is roughly based on the strstr() replacement from 'tin' written
717 inline static const char *_cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
719 register unsigned char *p, *t;
720 register int i, j, *delta;
724 if (!text) return(NULL);
725 if (!pattern) return(NULL);
727 /* algorithm fails if pattern is empty */
728 if ((p1 = patlen) == 0)
731 /* code below fails (whenever i is unsigned) if pattern too long */
737 for (i = 0; i <= 255; i++)
739 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
740 delta[tolower(*p++)] = i;
743 * From now on, we want patlen - 1.
744 * In the loop below, p points to the end of the pattern,
745 * t points to the end of the text to be tested against the
746 * pattern, and i counts the amount of text remaining, not
747 * including the part to be tested.
750 p = (unsigned char *) pattern + p1;
751 t = (unsigned char *) text + p1;
752 i = textlen - patlen;
754 if (tolower(p[0]) == tolower(t[0])) {
755 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
756 return ((char *)t - p1);
759 j = delta[tolower(t[0])];
769 * bmstrcasestr() -- case-insensitive substring search
771 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
772 * The code is roughly based on the strstr() replacement from 'tin' written
775 const char *cbmstrcasestr(const char *text, const char *pattern) {
779 if (!text) return(NULL);
780 if (!pattern) return(NULL);
782 textlen = strlen (text);
783 patlen = strlen (pattern);
785 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
788 const char *cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
789 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
793 * Local replacement for controversial C library function that generates
794 * names for temporary files. Included to shut up compiler warnings.
796 void CtdlMakeTempFileName(char *name, int len) {
799 while (i++, i < 100) {
800 snprintf(name, len, "/tmp/ctdl.%04lx.%04x",
804 if (!access(name, F_OK)) {
813 * Determine whether the specified message number is contained within the specified set.
814 * Returns nonzero if the specified message number is in the specified message set string.
816 int is_msg_in_mset(const char *mset, long msgnum) {
819 char setstr[SIZ], lostr[SIZ], histr[SIZ]; /* was 1024 */
823 * Now set it for all specified messages.
825 num_sets = num_tokens(mset, ',');
826 for (s=0; s<num_sets; ++s) {
827 extract_token(setstr, mset, s, ',', sizeof setstr);
829 extract_token(lostr, setstr, 0, ':', sizeof lostr);
830 if (num_tokens(setstr, ':') >= 2) {
831 extract_token(histr, setstr, 1, ':', sizeof histr);
832 if (!strcmp(histr, "*")) {
833 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
837 strcpy(histr, lostr);
842 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
850 * searches for a pattern within a search string
851 * returns position in string
853 int pattern2(char *search, char *patn)
857 len = strlen (search);
858 plen = strlen (patn);
859 for (a = 0; a < len; ++a) {
860 if (!strncasecmp(&search[a], patn, plen))
868 * Strip leading and trailing spaces from a string; with premeasured and adjusted length.
869 * buf - the string to modify
870 * len - length of the string.
872 void stripltlen(char *buf, int *len)
875 if (*len == 0) return;
876 while ((*len > delta) && (isspace(buf[delta]))){
879 memmove (buf, &buf[delta], *len - delta + 1);
882 if (*len == 0) return;
883 while (isspace(buf[(*len) - 1])){
884 buf[--(*len)] = '\0';
890 * Convert all whitespace characters in a supplied string to underscores
892 void convert_spaces_to_underscores(char *str)
900 for (i=0; i<len; ++i) {
901 if (isspace(str[i])) {
909 * check whether the provided string needs to be qp encoded or not
911 int CheckEncode(const char *pch, long len, const char *pche)
916 if (((unsigned char) *pch < 32) ||
917 ((unsigned char) *pch > 126)) {