1 // A basic toolset containing miscellaneous functions for string manipluation,
2 // encoding/decoding, and a bunch of other stuff.
4 // Copyright (c) 1987-2023 by the citadel.org team
6 // This program is open source software. Use, duplication, or disclosure
7 // is subject to the terms of the GNU General Public License, version 3.
13 #include <sys/types.h>
20 #include "libcitadel.h"
25 typedef unsigned char byte; // Byte type
27 // copy a string into a buffer of a known size. abort if we exceed the limits
29 // dest the targetbuffer
30 // src the source string
33 // returns the number of characters copied if dest is big enough, -n if not.
34 int safestrncpy(char *dest, const char *src, size_t n) {
37 if (dest == NULL || src == NULL)
39 fprintf(stderr, "safestrncpy: NULL argument\n");
45 if (dest[i] == 0) return i;
53 // num_tokens() - discover number of parameters/tokens in a string
54 int num_tokens(const char *source, char tok) {
56 const char *ptr = source;
62 while (*ptr != '\0') {
72 // extract_token() - a string tokenizer
73 // returns -1 if not found, or length of token.
74 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen) {
75 const char *s; // source
76 int len = 0; // running total length of extracted string
77 int current_token = 0; // token currently being processed
94 if (*s == separator) {
97 if ( (current_token == parmnum) && (*s != separator) && (len < maxlen) ) {
101 else if ((current_token > parmnum) || (len >= maxlen)) {
108 if (current_token < parmnum) {
115 // remove_token() - a tokenizer that kills, maims, and destroys
116 void remove_token(char *source, int parmnum, char separator) {
117 char *d, *s; // dest, source
120 // Find desired parameter
122 while (count < parmnum) {
123 // End of string, bail!
128 if (*d == separator) {
133 if (!d) return; // Parameter not found
135 // Find next parameter
137 while (*s && *s != separator) {
144 else if (d == source)
151 // extract_int() - extract an int parm without supplying a buffer
152 int extract_int(const char *source, int parmnum) {
155 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
162 // extract_long() - extract an long parm without supplying a buffer
163 long extract_long(const char *source, int parmnum) {
166 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
173 // extract_unsigned_long() - extract an unsigned long parm
174 unsigned long extract_unsigned_long(const char *source, int parmnum) {
177 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
178 return strtoul(buf, NULL, 10);
184 // if we send out non ascii subjects, we encode it this way.
185 char *rfc2047encode(const char *line, long length) {
186 const char *AlreadyEncoded;
189 #define UTF8_HEADER "=?UTF-8?B?"
191 // check if we're already done
192 AlreadyEncoded = strstr(line, "=?");
193 if ((AlreadyEncoded != NULL) && ((strstr(AlreadyEncoded, "?B?") != NULL)|| (strstr(AlreadyEncoded, "?Q?") != NULL))) {
197 result = (char*) malloc(sizeof(UTF8_HEADER) + 4 + length * 2);
198 strncpy (result, UTF8_HEADER, strlen (UTF8_HEADER));
199 CtdlEncodeBase64(result + strlen(UTF8_HEADER), line, length, BASE64_NO_LINEBREAKS);
200 end = strlen (result);
207 // removes double slashes from pathnames
208 // allows / disallows trailing slashes
209 void StripSlashes(char *Dir, int TrailingSlash) {
214 while (!IsEmptyStr(a)) {
226 if ((TrailingSlash) && (*(b - 1) != '/')){
235 // Trim leading and trailing whitespace from a string
236 size_t string_trim(char *buf) {
237 char *first_nonspace = NULL;
238 char *last_nonspace = NULL;
242 if ((buf == NULL) || (*buf == '\0')) {
246 for (ptr=buf; *ptr!=0; ++ptr) {
247 if (!isspace(*ptr)) {
248 if (!first_nonspace) {
249 first_nonspace = ptr;
255 if ((!first_nonspace) || (!last_nonspace)) {
260 new_len = last_nonspace - first_nonspace + 1;
261 memmove(buf, first_nonspace, new_len);
267 // check for the presence of a character within a string (returns count)
268 // st the string to examine
269 // ch the char to search
270 // returns the number of times ch appears in st
271 int haschar(const char *st, int ch) {
276 while (!IsEmptyStr(ptr))
286 // Determine whether the specified message number is contained within the specified sequence set.
287 int is_msg_in_sequence_set(const char *mset, long msgnum) {
290 char setstr[128], lostr[128], histr[128];
293 num_sets = num_tokens(mset, ',');
294 for (s=0; s<num_sets; ++s) {
295 extract_token(setstr, mset, s, ',', sizeof setstr);
297 extract_token(lostr, setstr, 0, ':', sizeof lostr);
298 if (num_tokens(setstr, ':') >= 2) {
299 extract_token(histr, setstr, 1, ':', sizeof histr);
300 if (!strcmp(histr, "*")) {
301 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
305 strcpy(histr, lostr);
310 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
316 // Utility function to "readline" from memory
317 // start Location in memory from which we are reading.
318 // buf the buffer to place the string in.
319 // maxlen Size of string buffer
320 // returns pointer to the source memory right after we stopped reading.
321 char *memreadline(char *start, char *buf, int maxlen) {
324 int len = 0; // tally our own length to avoid strlen() delays
330 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
333 if ((ch == 10) || (ch == 0)) {
341 // Utility function to "readline" from memory
342 // start Location in memory from which we are reading.
343 // buf the buffer to place the string in.
344 // maxlen Size of string buffer
345 // retlen the length of the returned string
346 // returns a pointer to the source memory right after we stopped reading.
347 char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen) {
350 int len = 0; // tally our own length to avoid strlen() delays
356 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
359 if ((ch == 10) || (ch == 0)) {
368 // Utility function to "readline" from memory
369 // start Location in memory from which we are reading.
370 // buf the buffer to place the string in.
371 // maxlen Size of string buffer
372 // return Pointer to the source memory right after we stopped reading.
373 const char *cmemreadline(const char *start, char *buf, int maxlen) {
376 int len = 0; // tally our own length to avoid strlen() delays
382 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
385 if ((ch == 10) || (ch == 0)) {
393 // Utility function to "readline" from memory
394 // start Location in memory from which we are reading.
395 // buf the buffer to place the string in.
396 // maxlen Size of string buffer
397 // retlen the length of the returned string
398 // return Pointer to the source memory right after we stopped reading.
399 const char *cmemreadlinelen(const char *start, char *buf, int maxlen, int *retlen) {
402 int len = 0; // tally our own length to avoid strlen() delays
408 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
411 if ((ch == 10) || (ch == 0)) {
420 // Strip a boundarized substring out of a string (for example, remove parentheses and anything inside them).
421 int stripout(char *str, char leftboundary, char rightboundary) {
429 for (int a = 0; str[a]; ++a) {
430 if ((lb==-1) && (str[a] == leftboundary)) {
432 } else if (str[a] == rightboundary) {
437 if ((lb==-1) || (rb <= lb)) {
441 strcpy(str + lb, str + rb + 1);
445 // Reduce a string down to a boundarized substring (for example, remove
446 // parentheses and anything outside them).
447 long stripallbut(char *str, char leftboundary, char rightboundary) {
456 while (str[orig_len]) {
457 if ((lb==-1) && (str[orig_len] == leftboundary)) {
459 } else if (str[orig_len] == rightboundary) {
465 if ((lb==-1) || (rb <= lb)) {
471 long new_len = rb - lb - 1;
472 memmove(str, str + lb + 1, new_len);
477 char *myfgets(char *s, int size, FILE *stream) {
478 char *ret = fgets(s, size, stream);
482 nl = strchr(s, '\n');
492 // Escape a string for feeding out as a URL.
493 // outbuf the output buffer
494 // oblen the size of outbuf to sanitize
495 // strbuf the input buffer
496 void urlesc(char *outbuf, size_t oblen, char *strbuf) {
497 int a, b, c, len, eclen, olen;
498 char *ec = " +#&;`'|*?-~<>^()[]{}/$\"\\";
501 len = strlen(strbuf);
504 for (a = 0; a < len; ++a) {
506 for (b = 0; b < eclen; ++b) {
507 if (strbuf[a] == ec[b])
511 snprintf(&outbuf[olen], oblen - olen, "%%%02x", strbuf[a]);
515 outbuf[olen ++] = strbuf[a];
521 // In our world, we want strcpy() to be able to work with overlapping strings.
525 char *strcpy(char *dest, const char *src) {
526 memmove(dest, src, (strlen(src) + 1) );
531 // Generate a new, globally unique UID parameter for a calendar etc. object
532 void generate_uuid(char *buf) {
533 static int seq = (-1);
534 static int no_kernel_uuid = 0;
536 // If we are running on Linux then we have a kernelspace uuid generator available
538 if (no_kernel_uuid == 0) {
540 fp = fopen("/proc/sys/kernel/random/uuid", "rb");
543 rv = fread(buf, 36, 1, fp);
552 // If the kernel didn't provide us with a uuid, we generate a pseudo-random one
560 seq = (seq % 0x0FFF) ;
562 sprintf(buf, "%08lx-%04lx-4%03x-a%03x-%012lx",
572 // bmstrcasestr() -- case-insensitive substring search
574 // This uses the Boyer-Moore search algorithm and is therefore quite fast.
575 // The code is roughly based on the strstr() replacement from 'tin' written
577 inline static char *_bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
579 register unsigned char *p, *t;
580 register int i, j, *delta;
584 if (!text) return(NULL);
585 if (!pattern) return(NULL);
587 // algorithm fails if pattern is empty
588 if ((p1 = patlen) == 0)
591 // code below fails (whenever i is unsigned) if pattern too long
597 for (i = 0; i <= 255; i++)
599 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
600 delta[tolower(*p++)] = i;
602 // From now on, we want patlen - 1.
603 // In the loop below, p points to the end of the pattern,
604 // t points to the end of the text to be tested against the
605 // pattern, and i counts the amount of text remaining, not
606 // including the part to be tested.
608 p = (unsigned char *) pattern + p1;
609 t = (unsigned char *) text + p1;
610 i = textlen - patlen;
612 if (tolower(p[0]) == tolower(t[0])) {
613 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
614 return ((char *)t - p1);
617 j = delta[tolower(t[0])];
628 * bmstrcasestr() -- case-insensitive substring search
630 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
631 * The code is roughly based on the strstr() replacement from 'tin' written
634 char *bmstrcasestr(char *text, const char *pattern) {
638 if (!text) return(NULL);
639 if (!pattern) return(NULL);
641 textlen = strlen (text);
642 patlen = strlen (pattern);
644 return _bmstrcasestr_len(text, textlen, pattern, patlen);
647 char *bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
648 return _bmstrcasestr_len(text, textlen, pattern, patlen);
653 * bmstrcasestr() -- case-insensitive substring search
655 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
656 * The code is roughly based on the strstr() replacement from 'tin' written
659 inline static const char *_cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
661 register unsigned char *p, *t;
662 register int i, j, *delta;
666 if (!text) return(NULL);
667 if (!pattern) return(NULL);
669 /* algorithm fails if pattern is empty */
670 if ((p1 = patlen) == 0)
673 /* code below fails (whenever i is unsigned) if pattern too long */
679 for (i = 0; i <= 255; i++)
681 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
682 delta[tolower(*p++)] = i;
685 * From now on, we want patlen - 1.
686 * In the loop below, p points to the end of the pattern,
687 * t points to the end of the text to be tested against the
688 * pattern, and i counts the amount of text remaining, not
689 * including the part to be tested.
692 p = (unsigned char *) pattern + p1;
693 t = (unsigned char *) text + p1;
694 i = textlen - patlen;
696 if (tolower(p[0]) == tolower(t[0])) {
697 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
698 return ((char *)t - p1);
701 j = delta[tolower(t[0])];
712 * bmstrcasestr() -- case-insensitive substring search
714 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
715 * The code is roughly based on the strstr() replacement from 'tin' written
718 const char *cbmstrcasestr(const char *text, const char *pattern) {
722 if (!text) return(NULL);
723 if (!pattern) return(NULL);
725 textlen = strlen (text);
726 patlen = strlen (pattern);
728 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
732 const char *cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
733 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
738 * Local replacement for controversial C library function that generates
739 * names for temporary files. Included to shut up compiler warnings.
741 void CtdlMakeTempFileName(char *name, int len) {
744 while (i++, i < 100) {
745 snprintf(name, len, "/tmp/ctdl.%04lx.%04x",
749 if (!access(name, F_OK)) {
757 * Determine whether the specified message number is contained within the specified set.
758 * Returns nonzero if the specified message number is in the specified message set string.
760 int is_msg_in_mset(const char *mset, long msgnum) {
763 char setstr[SIZ], lostr[SIZ], histr[SIZ];
766 // Now set it for all specified messages.
767 num_sets = num_tokens(mset, ',');
768 for (s=0; s<num_sets; ++s) {
769 extract_token(setstr, mset, s, ',', sizeof setstr);
771 extract_token(lostr, setstr, 0, ':', sizeof lostr);
772 if (num_tokens(setstr, ':') >= 2) {
773 extract_token(histr, setstr, 1, ':', sizeof histr);
774 if (!strcmp(histr, "*")) {
775 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
779 strcpy(histr, lostr);
784 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
791 // searches for a pattern within a search string
792 // returns position in string
793 int pattern2(char *search, char *patn) {
796 len = strlen (search);
797 plen = strlen (patn);
798 for (a = 0; a < len; ++a) {
799 if (!strncasecmp(&search[a], patn, plen))
807 * Convert all whitespace characters in a supplied string to underscores
809 void convert_spaces_to_underscores(char *str) {
816 for (i=0; i<len; ++i) {
817 if (isspace(str[i])) {
825 * check whether the provided string needs to be qp encoded or not
827 int CheckEncode(const char *pch, long len, const char *pche) {
831 if (((unsigned char) *pch < 32) ||
832 ((unsigned char) *pch > 126)) {