1 // A basic toolset containing miscellaneous functions for string manipluation,
2 // encoding/decoding, and a bunch of other stuff.
4 // Copyright (c) 1987-2022 by the citadel.org team
6 // This program is open source software. Use, duplication, or disclosure
7 // is subject to the terms of the GNU General Public License, version 3.
13 #include <sys/types.h>
20 #if TIME_WITH_SYS_TIME
21 # include <sys/time.h>
25 # include <sys/time.h>
31 #include "libcitadel.h"
37 typedef unsigned char byte; /* Byte type */
39 // copy a string into a buffer of a known size. abort if we exceed the limits
41 // dest the targetbuffer
42 // src the source string
45 // returns the number of characters copied if dest is big enough, -n if not.
46 int safestrncpy(char *dest, const char *src, size_t n) {
49 if (dest == NULL || src == NULL)
51 fprintf(stderr, "safestrncpy: NULL argument\n");
57 if (dest[i] == 0) return i;
65 // num_tokens() - discover number of parameters/tokens in a string
66 int num_tokens(const char *source, char tok) {
68 const char *ptr = source;
74 while (*ptr != '\0') {
84 // extract_token() - a string tokenizer
85 // returns -1 if not found, or length of token.
86 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen) {
87 const char *s; // source
88 int len = 0; // running total length of extracted string
89 int current_token = 0; // token currently being processed
106 if (*s == separator) {
109 if ( (current_token == parmnum) && (*s != separator) && (len < maxlen) ) {
113 else if ((current_token > parmnum) || (len >= maxlen)) {
120 if (current_token < parmnum) {
127 // remove_token() - a tokenizer that kills, maims, and destroys
128 void remove_token(char *source, int parmnum, char separator) {
129 char *d, *s; // dest, source
132 /* Find desired parameter */
134 while (count < parmnum) {
135 // End of string, bail!
140 if (*d == separator) {
145 if (!d) return; // Parameter not found
147 // Find next parameter
149 while (*s && *s != separator) {
156 else if (d == source)
163 // extract_int() - extract an int parm without supplying a buffer
164 int extract_int(const char *source, int parmnum) {
167 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
174 // extract_long() - extract an long parm without supplying a buffer
175 long extract_long(const char *source, int parmnum) {
178 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
185 // extract_unsigned_long() - extract an unsigned long parm
186 unsigned long extract_unsigned_long(const char *source, int parmnum) {
189 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
190 return strtoul(buf, NULL, 10);
196 // if we send out non ascii subjects, we encode it this way.
197 char *rfc2047encode(const char *line, long length) {
198 const char *AlreadyEncoded;
201 #define UTF8_HEADER "=?UTF-8?B?"
203 /* check if we're already done */
204 AlreadyEncoded = strstr(line, "=?");
205 if ((AlreadyEncoded != NULL) && ((strstr(AlreadyEncoded, "?B?") != NULL)|| (strstr(AlreadyEncoded, "?Q?") != NULL))) {
209 result = (char*) malloc(sizeof(UTF8_HEADER) + 4 + length * 2);
210 strncpy (result, UTF8_HEADER, strlen (UTF8_HEADER));
211 CtdlEncodeBase64(result + strlen(UTF8_HEADER), line, length, BASE64_NO_LINEBREAKS);
212 end = strlen (result);
219 // removes double slashes from pathnames
220 // allows / disallows trailing slashes
221 void StripSlashes(char *Dir, int TrailingSlash) {
226 while (!IsEmptyStr(a)) {
238 if ((TrailingSlash) && (*(b - 1) != '/')){
247 // Strip leading and trailing spaces from a string
248 size_t string_trim(char *buf) {
249 char *first_nonspace = NULL;
250 char *last_nonspace = NULL;
254 if ((buf == NULL) || (*buf == '\0')) {
258 for (ptr=buf; *ptr!=0; ++ptr) {
259 if (!isspace(*ptr)) {
260 if (!first_nonspace) {
261 first_nonspace = ptr;
267 if ((!first_nonspace) || (!last_nonspace)) {
272 new_len = last_nonspace - first_nonspace + 1;
273 memmove(buf, first_nonspace, new_len);
280 * check for the presence of a character within a string (returns count)
281 * st the string to examine
282 * ch the char to search
283 * returns the number of times ch appears in st
285 int haschar(const char *st, int ch) {
290 while (!IsEmptyStr(ptr))
301 * Determine whether the specified message number is contained within the
302 * specified sequence set.
304 int is_msg_in_sequence_set(const char *mset, long msgnum) {
307 char setstr[128], lostr[128], histr[128];
310 num_sets = num_tokens(mset, ',');
311 for (s=0; s<num_sets; ++s) {
312 extract_token(setstr, mset, s, ',', sizeof setstr);
314 extract_token(lostr, setstr, 0, ':', sizeof lostr);
315 if (num_tokens(setstr, ':') >= 2)
317 extract_token(histr, setstr, 1, ':', sizeof histr);
318 if (!strcmp(histr, "*"))
320 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
325 strcpy(histr, lostr);
330 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
337 * Utility function to "readline" from memory
338 * start Location in memory from which we are reading.
339 * buf the buffer to place the string in.
340 * maxlen Size of string buffer
341 * returns pointer to the source memory right after we stopped reading.
343 char *memreadline(char *start, char *buf, int maxlen) {
346 int len = 0; /* tally our own length to avoid strlen() delays */
352 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
355 if ((ch == 10) || (ch == 0)) {
364 * Utility function to "readline" from memory
365 * start Location in memory from which we are reading.
366 * buf the buffer to place the string in.
367 * maxlen Size of string buffer
368 * retlen the length of the returned string
369 * returns a pointer to the source memory right after we stopped reading.
371 char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen) {
374 int len = 0; /* tally our own length to avoid strlen() delays */
381 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10))
385 if ((ch == 10) || (ch == 0))
396 * \brief Utility function to "readline" from memory
397 * \param start Location in memory from which we are reading.
398 * \param buf the buffer to place the string in.
399 * \param maxlen Size of string buffer
400 * \return Pointer to the source memory right after we stopped reading.
402 const char *cmemreadline(const char *start, char *buf, int maxlen)
406 int len = 0; /**< tally our own length to avoid strlen() delays */
412 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
415 if ((ch == 10) || (ch == 0)) {
424 * \brief Utility function to "readline" from memory
425 * \param start Location in memory from which we are reading.
426 * \param buf the buffer to place the string in.
427 * \param maxlen Size of string buffer
428 * \param retlen the length of the returned string
429 * \return Pointer to the source memory right after we stopped reading.
431 const char *cmemreadlinelen(const char *start, char *buf, int maxlen, int *retlen)
435 int len = 0; /**< tally our own length to avoid strlen() delays */
441 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
444 if ((ch == 10) || (ch == 0)) {
454 * Strip a boundarized substring out of a string (for example, remove
455 * parentheses and anything inside them).
457 int stripout(char *str, char leftboundary, char rightboundary) {
462 for (a = 0; a < strlen(str); ++a) {
463 if (str[a] == leftboundary) lb = a;
464 if (str[a] == rightboundary) rb = a;
467 if ( (lb > 0) && (rb > lb) ) {
468 strcpy(&str[lb - 1], &str[rb + 1]);
472 else if ( (lb == 0) && (rb > lb) ) {
473 strcpy(str, &str[rb + 1]);
481 * Reduce a string down to a boundarized substring (for example, remove
482 * parentheses and anything outside them).
484 long stripallbut(char *str, char leftboundary, char rightboundary) {
490 lb = strrchr(str, leftboundary);
493 rb = strchr(str, rightboundary);
494 if ((rb != NULL) && (rb >= lb)) {
497 len = (long)rb - (long)lb;
498 memmove(str, lb, len);
504 return (long)strlen(str);
508 char *myfgets(char *s, int size, FILE *stream) {
509 char *ret = fgets(s, size, stream);
513 nl = strchr(s, '\n');
523 * \brief Escape a string for feeding out as a URL.
524 * \param outbuf the output buffer
525 * \param oblen the size of outbuf to sanitize
526 * \param strbuf the input buffer
528 void urlesc(char *outbuf, size_t oblen, char *strbuf)
530 int a, b, c, len, eclen, olen;
531 char *ec = " +#&;`'|*?-~<>^()[]{}/$\"\\";
534 len = strlen(strbuf);
537 for (a = 0; a < len; ++a) {
539 for (b = 0; b < eclen; ++b) {
540 if (strbuf[a] == ec[b])
544 snprintf(&outbuf[olen], oblen - olen, "%%%02x", strbuf[a]);
548 outbuf[olen ++] = strbuf[a];
556 * In our world, we want strcpy() to be able to work with overlapping strings.
561 char *strcpy(char *dest, const char *src) {
562 memmove(dest, src, (strlen(src) + 1) );
568 * Generate a new, globally unique UID parameter for a calendar etc. object
570 void generate_uuid(char *buf) {
571 static int seq = (-1);
572 static int no_kernel_uuid = 0;
574 /* If we are running on Linux then we have a kernelspace uuid generator available */
576 if (no_kernel_uuid == 0) {
578 fp = fopen("/proc/sys/kernel/random/uuid", "rb");
581 rv = fread(buf, 36, 1, fp);
590 /* If the kernel didn't provide us with a uuid, we generate a pseudo-random one */
598 seq = (seq % 0x0FFF) ;
600 sprintf(buf, "%08lx-%04lx-4%03x-a%03x-%012lx",
610 * bmstrcasestr() -- case-insensitive substring search
612 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
613 * The code is roughly based on the strstr() replacement from 'tin' written
616 inline static char *_bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
618 register unsigned char *p, *t;
619 register int i, j, *delta;
623 if (!text) return(NULL);
624 if (!pattern) return(NULL);
626 /* algorithm fails if pattern is empty */
627 if ((p1 = patlen) == 0)
630 /* code below fails (whenever i is unsigned) if pattern too long */
636 for (i = 0; i <= 255; i++)
638 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
639 delta[tolower(*p++)] = i;
642 * From now on, we want patlen - 1.
643 * In the loop below, p points to the end of the pattern,
644 * t points to the end of the text to be tested against the
645 * pattern, and i counts the amount of text remaining, not
646 * including the part to be tested.
649 p = (unsigned char *) pattern + p1;
650 t = (unsigned char *) text + p1;
651 i = textlen - patlen;
653 if (tolower(p[0]) == tolower(t[0])) {
654 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
655 return ((char *)t - p1);
658 j = delta[tolower(t[0])];
669 * bmstrcasestr() -- case-insensitive substring search
671 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
672 * The code is roughly based on the strstr() replacement from 'tin' written
675 char *bmstrcasestr(char *text, const char *pattern) {
679 if (!text) return(NULL);
680 if (!pattern) return(NULL);
682 textlen = strlen (text);
683 patlen = strlen (pattern);
685 return _bmstrcasestr_len(text, textlen, pattern, patlen);
688 char *bmstrcasestr_len(char *text, size_t textlen, const char *pattern, size_t patlen) {
689 return _bmstrcasestr_len(text, textlen, pattern, patlen);
694 * bmstrcasestr() -- case-insensitive substring search
696 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
697 * The code is roughly based on the strstr() replacement from 'tin' written
700 inline static const char *_cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
702 register unsigned char *p, *t;
703 register int i, j, *delta;
707 if (!text) return(NULL);
708 if (!pattern) return(NULL);
710 /* algorithm fails if pattern is empty */
711 if ((p1 = patlen) == 0)
714 /* code below fails (whenever i is unsigned) if pattern too long */
720 for (i = 0; i <= 255; i++)
722 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
723 delta[tolower(*p++)] = i;
726 * From now on, we want patlen - 1.
727 * In the loop below, p points to the end of the pattern,
728 * t points to the end of the text to be tested against the
729 * pattern, and i counts the amount of text remaining, not
730 * including the part to be tested.
733 p = (unsigned char *) pattern + p1;
734 t = (unsigned char *) text + p1;
735 i = textlen - patlen;
737 if (tolower(p[0]) == tolower(t[0])) {
738 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
739 return ((char *)t - p1);
742 j = delta[tolower(t[0])];
753 * bmstrcasestr() -- case-insensitive substring search
755 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
756 * The code is roughly based on the strstr() replacement from 'tin' written
759 const char *cbmstrcasestr(const char *text, const char *pattern) {
763 if (!text) return(NULL);
764 if (!pattern) return(NULL);
766 textlen = strlen (text);
767 patlen = strlen (pattern);
769 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
773 const char *cbmstrcasestr_len(const char *text, size_t textlen, const char *pattern, size_t patlen) {
774 return _cbmstrcasestr_len(text, textlen, pattern, patlen);
779 * Local replacement for controversial C library function that generates
780 * names for temporary files. Included to shut up compiler warnings.
782 void CtdlMakeTempFileName(char *name, int len) {
785 while (i++, i < 100) {
786 snprintf(name, len, "/tmp/ctdl.%04lx.%04x",
790 if (!access(name, F_OK)) {
798 * Determine whether the specified message number is contained within the specified set.
799 * Returns nonzero if the specified message number is in the specified message set string.
801 int is_msg_in_mset(const char *mset, long msgnum) {
804 char setstr[SIZ], lostr[SIZ], histr[SIZ];
807 // Now set it for all specified messages.
808 num_sets = num_tokens(mset, ',');
809 for (s=0; s<num_sets; ++s) {
810 extract_token(setstr, mset, s, ',', sizeof setstr);
812 extract_token(lostr, setstr, 0, ':', sizeof lostr);
813 if (num_tokens(setstr, ':') >= 2) {
814 extract_token(histr, setstr, 1, ':', sizeof histr);
815 if (!strcmp(histr, "*")) {
816 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
820 strcpy(histr, lostr);
825 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
832 // searches for a pattern within a search string
833 // returns position in string
834 int pattern2(char *search, char *patn) {
837 len = strlen (search);
838 plen = strlen (patn);
839 for (a = 0; a < len; ++a) {
840 if (!strncasecmp(&search[a], patn, plen))
848 * Strip leading and trailing spaces from a string; with premeasured and adjusted length.
849 * buf - the string to modify
850 * len - length of the string.
852 void string_trimlen(char *buf, int *len) {
854 if (*len == 0) return;
855 while ((*len > delta) && (isspace(buf[delta]))){
858 memmove (buf, &buf[delta], *len - delta + 1);
861 if (*len == 0) return;
862 while (isspace(buf[(*len) - 1])){
863 buf[--(*len)] = '\0';
869 * Convert all whitespace characters in a supplied string to underscores
871 void convert_spaces_to_underscores(char *str) {
878 for (i=0; i<len; ++i) {
879 if (isspace(str[i])) {
887 * check whether the provided string needs to be qp encoded or not
889 int CheckEncode(const char *pch, long len, const char *pche) {
893 if (((unsigned char) *pch < 32) ||
894 ((unsigned char) *pch > 126)) {