1 // A basic toolset containing miscellaneous functions for string manipluation,
2 // encoding/decoding, and a bunch of other stuff.
4 // Copyright (c) 1987-2023 by the citadel.org team
6 // This program is open source software. Use, duplication, or disclosure
7 // is subject to the terms of the GNU General Public License, version 3.
13 #include <sys/types.h>
20 #include "libcitadel.h"
25 typedef unsigned char byte; // Byte type
27 // copy a string into a buffer of a known size. abort if we exceed the limits
29 // dest the targetbuffer
30 // src the source string
33 // returns the number of characters copied if dest is big enough, -n if not.
34 int safestrncpy(char *dest, const char *src, size_t n) {
37 if (dest == NULL || src == NULL) {
38 fprintf(stderr, "safestrncpy: NULL argument\n");
44 if (dest[i] == 0) return i;
52 // num_tokens() - discover number of parameters/tokens in a string
53 int num_tokens(const char *source, char tok) {
55 const char *ptr = source;
61 while (*ptr != '\0') {
71 // extract_token() - a string tokenizer
72 // returns -1 if not found, or length of token.
73 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen) {
74 const char *s; // source
75 int len = 0; // running total length of extracted string
76 int current_token = 0; // token currently being processed
93 if (*s == separator) {
96 if ( (current_token == parmnum) && (*s != separator) && (len < maxlen) ) {
100 else if ((current_token > parmnum) || (len >= maxlen)) {
107 if (current_token < parmnum) {
114 // remove_token() - a tokenizer that kills, maims, and destroys
115 void remove_token(char *source, int parmnum, char separator) {
116 char *d, *s; // dest, source
119 // Find desired parameter
121 while (count < parmnum) {
122 // End of string, bail!
127 if (*d == separator) {
132 if (!d) return; // Parameter not found
134 // Find next parameter
136 while (*s && *s != separator) {
143 else if (d == source)
150 // extract_int() - extract an int parm without supplying a buffer
151 int extract_int(const char *source, int parmnum) {
154 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
161 // extract_long() - extract an long parm without supplying a buffer
162 long extract_long(const char *source, int parmnum) {
165 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
172 // extract_unsigned_long() - extract an unsigned long parm
173 unsigned long extract_unsigned_long(const char *source, int parmnum) {
176 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
177 return strtoul(buf, NULL, 10);
183 // if we send out non ascii subjects, we encode it this way.
184 char *rfc2047encode(const char *line, long length) {
185 const char *AlreadyEncoded;
188 #define UTF8_HEADER "=?UTF-8?B?"
190 // check if we're already done
191 AlreadyEncoded = strstr(line, "=?");
192 if ((AlreadyEncoded != NULL) && ((strstr(AlreadyEncoded, "?B?") != NULL)|| (strstr(AlreadyEncoded, "?Q?") != NULL))) {
196 result = (char*) malloc(sizeof(UTF8_HEADER) + 4 + length * 2);
197 strncpy (result, UTF8_HEADER, strlen (UTF8_HEADER));
198 CtdlEncodeBase64(result + strlen(UTF8_HEADER), line, length, BASE64_NO_LINEBREAKS);
199 end = strlen (result);
206 // removes double slashes from pathnames
207 // allows / disallows trailing slashes
208 void StripSlashes(char *Dir, int TrailingSlash) {
213 while (!IsEmptyStr(a)) {
225 if ((TrailingSlash) && (*(b - 1) != '/')){
234 // Trim leading and trailing whitespace from a string
235 size_t string_trim(char *buf) {
236 char *first_nonspace = NULL;
237 char *last_nonspace = NULL;
241 if ((buf == NULL) || (*buf == '\0')) {
245 for (ptr=buf; *ptr!=0; ++ptr) {
246 if (!isspace(*ptr)) {
247 if (!first_nonspace) {
248 first_nonspace = ptr;
254 if ((!first_nonspace) || (!last_nonspace)) {
259 new_len = last_nonspace - first_nonspace + 1;
260 memmove(buf, first_nonspace, new_len);
266 // check for the presence of a character within a string (returns count)
267 // st the string to examine
268 // ch the char to search
269 // returns the number of times ch appears in st
270 int haschar(const char *st, int ch) {
275 while (!IsEmptyStr(ptr))
285 // Determine whether the specified message number is contained within the specified sequence set.
286 int is_msg_in_sequence_set(const char *mset, long msgnum) {
289 char setstr[128], lostr[128], histr[128];
292 num_sets = num_tokens(mset, ',');
293 for (s=0; s<num_sets; ++s) {
294 extract_token(setstr, mset, s, ',', sizeof setstr);
296 extract_token(lostr, setstr, 0, ':', sizeof lostr);
297 if (num_tokens(setstr, ':') >= 2) {
298 extract_token(histr, setstr, 1, ':', sizeof histr);
299 if (!strcmp(histr, "*")) {
300 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
304 strcpy(histr, lostr);
309 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
315 // Utility function to "readline" from memory
316 // start Location in memory from which we are reading.
317 // buf the buffer to place the string in.
318 // maxlen Size of string buffer
319 // returns pointer to the source memory right after we stopped reading.
320 char *memreadline(char *start, char *buf, int maxlen) {
323 int len = 0; // tally our own length to avoid strlen() delays
329 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
332 if ((ch == 10) || (ch == 0)) {
340 // Utility function to "readline" from memory
341 // start Location in memory from which we are reading.
342 // buf the buffer to place the string in.
343 // maxlen Size of string buffer
344 // retlen the length of the returned string
345 // returns a pointer to the source memory right after we stopped reading.
346 char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen) {
349 int len = 0; // tally our own length to avoid strlen() delays
355 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
358 if ((ch == 10) || (ch == 0)) {
367 // Utility function to "readline" from memory
368 // start Location in memory from which we are reading.
369 // buf the buffer to place the string in.
370 // maxlen Size of string buffer
371 // return Pointer to the source memory right after we stopped reading.
372 const char *cmemreadline(const char *start, char *buf, int maxlen) {
375 int len = 0; // tally our own length to avoid strlen() delays
381 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
384 if ((ch == 10) || (ch == 0)) {
392 // Utility function to "readline" from memory
393 // start Location in memory from which we are reading.
394 // buf the buffer to place the string in.
395 // maxlen Size of string buffer
396 // retlen the length of the returned string
397 // return Pointer to the source memory right after we stopped reading.
398 const char *cmemreadlinelen(const char *start, char *buf, int maxlen, int *retlen) {
401 int len = 0; // tally our own length to avoid strlen() delays
407 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
410 if ((ch == 10) || (ch == 0)) {
419 // Strip a boundarized substring out of a string (for example, remove parentheses and anything inside them).
420 int stripout(char *str, char leftboundary, char rightboundary) {
428 for (int a = 0; str[a]; ++a) {
429 if ((lb==-1) && (str[a] == leftboundary)) {
431 } else if (str[a] == rightboundary) {
436 if ((lb==-1) || (rb <= lb)) {
440 strcpy(str + lb, str + rb + 1);
444 // Reduce a string down to a boundarized substring (for example, remove
445 // parentheses and anything outside them).
446 long stripallbut(char *str, char leftboundary, char rightboundary) {
455 while (str[orig_len]) {
456 if ((lb==-1) && (str[orig_len] == leftboundary)) {
458 } else if (str[orig_len] == rightboundary) {
464 if ((lb==-1) || (rb <= lb)) {
470 long new_len = rb - lb - 1;
471 memmove(str, str + lb + 1, new_len);
476 char *myfgets(char *s, int size, FILE *stream) {
477 char *ret = fgets(s, size, stream);
481 nl = strchr(s, '\n');
491 // Escape a string for feeding out as a URL.
492 // outbuf the output buffer
493 // oblen the size of outbuf to sanitize
494 // strbuf the input buffer
495 void urlesc(char *outbuf, size_t oblen, char *strbuf) {
496 int a, b, c, len, eclen, olen;
497 char *ec = " +#&;`'|*?-~<>^()[]{}/$\"\\";
500 len = strlen(strbuf);
503 for (a = 0; a < len; ++a) {
505 for (b = 0; b < eclen; ++b) {
506 if (strbuf[a] == ec[b])
510 snprintf(&outbuf[olen], oblen - olen, "%%%02x", strbuf[a]);
514 outbuf[olen ++] = strbuf[a];
520 // In our world, we want strcpy() to be able to work with overlapping strings.
524 char *strcpy(char *dest, const char *src) {
525 memmove(dest, src, (strlen(src) + 1) );
530 // Generate a new, globally unique UID parameter for a calendar etc. object
531 void generate_uuid(char *buf) {
532 static int seq = (-1);
533 static int no_kernel_uuid = 0;
535 // If we are running on Linux then we have a kernelspace uuid generator available
537 if (no_kernel_uuid == 0) {
539 fp = fopen("/proc/sys/kernel/random/uuid", "rb");
542 rv = fread(buf, 36, 1, fp);
551 // If the kernel didn't provide us with a uuid, we generate a pseudo-random one
559 seq = (seq % 0x0FFF) ;
561 sprintf(buf, "%08lx-%04lx-4%03x-a%03x-%012lx",
571 // bmstrcasestr() -- case-insensitive substring search
573 // This uses the Boyer-Moore search algorithm and is therefore quite fast.
574 // The code is roughly based on the strstr() replacement from 'tin' written
576 inline static char *_bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
578 register unsigned char *p, *t;
579 register int i, j, *delta;
583 if (!text) return(NULL);
584 if (!pattern) return(NULL);
586 // algorithm fails if pattern is empty
587 if ((p1 = patlen) == 0)
590 // code below fails (whenever i is unsigned) if pattern too long
596 for (i = 0; i <= 255; i++)
598 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
599 delta[tolower(*p++)] = i;
601 // From now on, we want patlen - 1.
602 // In the loop below, p points to the end of the pattern,
603 // t points to the end of the text to be tested against the
604 // pattern, and i counts the amount of text remaining, not
605 // including the part to be tested.
607 p = (unsigned char *) pattern + p1;
608 t = (unsigned char *) text + p1;
609 i = textlen - patlen;
611 if (tolower(p[0]) == tolower(t[0])) {
612 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
613 return ((char *)t - p1);
616 j = delta[tolower(t[0])];
627 * bmstrcasestr() -- case-insensitive substring search
629 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
630 * The code is roughly based on the strstr() replacement from 'tin' written
633 char *bmstrcasestr(char *text, const char *pattern) {
637 if (!text) return(NULL);
638 if (!pattern) return(NULL);
640 textlen = strlen (text);
641 patlen = strlen (pattern);
643 return _bmstrcasestr_len(text, textlen, pattern, patlen);
646 char *bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
647 return _bmstrcasestr_len(text, textlen, pattern, patlen);
652 * bmstrcasestr() -- case-insensitive substring search
654 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
655 * The code is roughly based on the strstr() replacement from 'tin' written
658 inline static const char *_cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
660 register unsigned char *p, *t;
661 register int i, j, *delta;
665 if (!text) return(NULL);
666 if (!pattern) return(NULL);
668 /* algorithm fails if pattern is empty */
669 if ((p1 = patlen) == 0)
672 /* code below fails (whenever i is unsigned) if pattern too long */
678 for (i = 0; i <= 255; i++)
680 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
681 delta[tolower(*p++)] = i;
684 * From now on, we want patlen - 1.
685 * In the loop below, p points to the end of the pattern,
686 * t points to the end of the text to be tested against the
687 * pattern, and i counts the amount of text remaining, not
688 * including the part to be tested.
691 p = (unsigned char *) pattern + p1;
692 t = (unsigned char *) text + p1;
693 i = textlen - patlen;
695 if (tolower(p[0]) == tolower(t[0])) {
696 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
697 return ((char *)t - p1);
700 j = delta[tolower(t[0])];
711 * bmstrcasestr() -- case-insensitive substring search
713 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
714 * The code is roughly based on the strstr() replacement from 'tin' written
717 const char *cbmstrcasestr(const char *text, const char *pattern) {
721 if (!text) return(NULL);
722 if (!pattern) return(NULL);
724 textlen = strlen (text);
725 patlen = strlen (pattern);
727 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
731 const char *cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
732 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
737 * Local replacement for controversial C library function that generates
738 * names for temporary files. Included to shut up compiler warnings.
740 void CtdlMakeTempFileName(char *name, int len) {
743 while (i++, i < 100) {
744 snprintf(name, len, "/tmp/ctdl.%04lx.%04x",
748 if (!access(name, F_OK)) {
756 * Determine whether the specified message number is contained within the specified set.
757 * Returns nonzero if the specified message number is in the specified message set string.
759 int is_msg_in_mset(const char *mset, long msgnum) {
762 char setstr[SIZ], lostr[SIZ], histr[SIZ];
765 // Now set it for all specified messages.
766 num_sets = num_tokens(mset, ',');
767 for (s=0; s<num_sets; ++s) {
768 extract_token(setstr, mset, s, ',', sizeof setstr);
770 extract_token(lostr, setstr, 0, ':', sizeof lostr);
771 if (num_tokens(setstr, ':') >= 2) {
772 extract_token(histr, setstr, 1, ':', sizeof histr);
773 if (!strcmp(histr, "*")) {
774 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
778 strcpy(histr, lostr);
783 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
790 // searches for a pattern within a search string
791 // returns position in string
792 int pattern2(char *search, char *patn) {
795 len = strlen (search);
796 plen = strlen (patn);
797 for (a = 0; a < len; ++a) {
798 if (!strncasecmp(&search[a], patn, plen))
806 * Convert all whitespace characters in a supplied string to underscores
808 void convert_spaces_to_underscores(char *str) {
815 for (i=0; i<len; ++i) {
816 if (isspace(str[i])) {
824 * check whether the provided string needs to be qp encoded or not
826 int CheckEncode(const char *pch, long len, const char *pche) {
830 if (((unsigned char) *pch < 32) ||
831 ((unsigned char) *pch > 126)) {