2 * A basic toolset containing miscellaneous functions for string manipluation,
3 * encoding/decoding, and a bunch of other stuff.
11 #include <sys/types.h>
18 #if TIME_WITH_SYS_TIME
19 # include <sys/time.h>
23 # include <sys/time.h>
29 #include "libcitadel.h"
35 typedef unsigned char byte; /* Byte type */
37 /* Base64 encoding table */
38 const byte etable[256] = {
39 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
40 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103,
41 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
42 118, 119, 120, 121, 122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 43,
43 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51 0, 0, 0, 0, 0, 0, 0, 0, 0
54 /* Base64 decoding table */
55 const byte dtable[256] = {
56 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
57 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
58 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
59 128, 62, 128, 128, 128, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
60 128, 128, 128, 0, 128, 128, 128, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
61 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 128, 128, 128,
62 128, 128, 128, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
63 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 128, 128, 128, 128,
64 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
65 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
66 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
67 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
68 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
69 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
70 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
71 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
72 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
77 * copy a string into a buffer of a known size. abort if we exceed the limits
79 * dest the targetbuffer
80 * src the source string
83 * returns the number of characters copied if dest is big enough, -n if not.
85 int safestrncpy(char *dest, const char *src, size_t n)
89 if (dest == NULL || src == NULL) {
90 fprintf(stderr, "safestrncpy: NULL argument\n");
96 if (dest[i] == 0) return i;
106 * num_tokens() - discover number of parameters/tokens in a string
108 int num_tokens(const char *source, char tok)
111 const char *ptr = source;
113 if (source == NULL) {
117 while (*ptr != '\0') {
126 //extern void cit_backtrace(void);
130 * extract_token() - a string tokenizer
131 * returns -1 if not found, or length of token.
133 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen)
135 const char *s; //* source * /
136 int len = 0; //* running total length of extracted string * /
137 int current_token = 0; //* token currently being processed * /
146 //lprintf (CTDL_DEBUG, "test >: n: %d sep: %c source: %s \n willi \n", parmnum, separator, source);
156 if (*s == separator) {
159 if ( (current_token == parmnum) &&
165 else if ((current_token > parmnum) || (len >= maxlen)) {
172 if (current_token < parmnum) {
173 //lprintf (CTDL_DEBUG,"test <!: %s\n", dest);
176 //lprintf (CTDL_DEBUG,"test <: %d; %s\n", len, dest);
183 * extract_token() - a string tokenizer
185 long extract_token(char *dest, const char *source, int parmnum, char separator, int maxlen)
188 const char *s; // source
194 //lprintf (CTDL_DEBUG, "test >: n: %d sep: %c source: %s \n willi \n", parmnum, separator, source);
197 // Locate desired parameter
199 while (count < parmnum) {
200 // End of string, bail!
205 if (*s == separator) {
211 //lprintf (CTDL_DEBUG,"test <!: %s\n", dest);
212 return -1; // Parameter not found
215 for (d = dest; *s && *s != separator && ++len<maxlen; s++, d++) {
219 //lprintf (CTDL_DEBUG,"test <: %d; %s\n", len, dest);
226 * remove_token() - a tokenizer that kills, maims, and destroys
228 void remove_token(char *source, int parmnum, char separator)
230 char *d, *s; /* dest, source */
233 /* Find desired parameter */
235 while (count < parmnum) {
236 /* End of string, bail! */
241 if (*d == separator) {
246 if (!d) return; /* Parameter not found */
248 /* Find next parameter */
250 while (*s && *s != separator) {
257 else if (d == source)
271 * extract_int() - extract an int parm w/o supplying a buffer
273 int extract_int(const char *source, int parmnum)
277 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
284 * extract_long() - extract an long parm w/o supplying a buffer
286 long extract_long(const char *source, int parmnum)
290 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
298 * extract_unsigned_long() - extract an unsigned long parm
300 unsigned long extract_unsigned_long(const char *source, int parmnum)
304 if (extract_token(buf, source, parmnum, '|', sizeof buf) > 0)
305 return strtoul(buf, NULL, 10);
312 * CtdlDecodeBase64() and CtdlEncodeBase64() are adaptations of code by John Walker.
315 size_t CtdlEncodeBase64(char *dest, const char *source, size_t sourcelen, int linebreaks)
317 int i, hiteof = FALSE;
323 byte igroup[3], ogroup[4];
326 igroup[0] = igroup[1] = igroup[2] = 0;
327 for (n = 0; n < 3; n++) {
328 if (spos >= sourcelen) {
333 igroup[n] = (byte) c;
336 ogroup[0] = etable[igroup[0] >> 2];
338 etable[((igroup[0] & 3) << 4) |
341 etable[((igroup[1] & 0xF) << 2) |
343 ogroup[3] = etable[igroup[2] & 0x3F];
346 * Replace characters in output stream with "=" pad
347 * characters if fewer than three characters were
348 * read from the end of the input stream.
357 for (i = 0; i < 4; i++) {
358 dest[dpos++] = ogroup[i];
362 if ( (linebreaks) && (thisline > 70) ) {
370 if ( (linebreaks) && (thisline > 70) ) {
383 * Convert base64-encoded to binary. Returns the length of the decoded data.
384 * It will stop after reading 'length' bytes.
386 int CtdlDecodeBase64(char *dest, const char *source, size_t length)
393 byte a[4], b[4], o[3];
395 for (i = 0; i < 4; i++) {
396 if (spos >= length) {
407 if (dtable[c] & 0x80) {
408 /* Ignoring errors: discard invalid character. */
413 b[i] = (byte) dtable[c];
415 o[0] = (b[0] << 2) | (b[1] >> 4);
416 o[1] = (b[1] << 4) | (b[2] >> 2);
417 o[2] = (b[2] << 6) | b[3];
418 i = a[2] == '=' ? 1 : (a[3] == '=' ? 2 : 3);
419 if (i>=1) dest[dpos++] = o[0];
420 if (i>=2) dest[dpos++] = o[1];
421 if (i>=3) dest[dpos++] = o[2];
431 * if we send out non ascii subjects, we encode it this way.
433 char *rfc2047encode(char *line, long length)
435 char *AlreadyEncoded;
438 #define UTF8_HEADER "=?UTF-8?B?"
440 /* check if we're already done */
441 AlreadyEncoded = strstr(line, "=?");
442 if ((AlreadyEncoded != NULL) &&
443 ((strstr(AlreadyEncoded, "?B?") != NULL)||
444 (strstr(AlreadyEncoded, "?Q?") != NULL)))
449 result = (char*) malloc(sizeof(UTF8_HEADER) + 4 + length * 2);
450 strncpy (result, UTF8_HEADER, strlen (UTF8_HEADER));
451 CtdlEncodeBase64(result + strlen(UTF8_HEADER), line, length, 0);
452 end = strlen (result);
461 * Strip leading and trailing spaces from a string
463 void striplt(char *buf)
468 if (buf==NULL) return;
469 if (IsEmptyStr(buf)) return;
471 while ((!IsEmptyStr(buf)) && (isspace(buf[len - 1])))
473 if (IsEmptyStr(buf)) return;
475 while ((!IsEmptyStr(buf)) && (isspace(buf[a])))
478 memmove(buf, &buf[a], len - a + 1);
486 * \brief check for the presence of a character within a string (returns count)
487 * \param st the string to examine
488 * \param ch the char to search
489 * \return the position inside of st
491 int haschar(const char *st,int ch)
497 while (!IsEmptyStr(ptr))
511 * Format a date/time stamp for output
512 * seconds is whether to print the seconds
514 void fmt_date(char *buf, size_t n, time_t thetime, int seconds) {
518 /* Month strings for date conversions ... this needs to be localized eventually */
519 char *fmt_date_months[12] = {
520 "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
524 localtime_r(&thetime, &tm);
527 if (hour == 0) hour = 12;
528 else if (hour > 12) hour = hour - 12;
531 snprintf(buf, n, "%s %d %4d %d:%02d:%02d%s",
532 fmt_date_months[tm.tm_mon],
538 ( (tm.tm_hour >= 12) ? "pm" : "am" )
541 snprintf(buf, n, "%s %d %4d %d:%02d%s",
542 fmt_date_months[tm.tm_mon],
547 ( (tm.tm_hour >= 12) ? "pm" : "am" )
555 * Determine whether the specified message number is contained within the
556 * specified sequence set.
558 int is_msg_in_sequence_set(const char *mset, long msgnum) {
561 char setstr[128], lostr[128], histr[128];
564 num_sets = num_tokens(mset, ',');
565 for (s=0; s<num_sets; ++s) {
566 extract_token(setstr, mset, s, ',', sizeof setstr);
568 extract_token(lostr, setstr, 0, ':', sizeof lostr);
569 if (num_tokens(setstr, ':') >= 2) {
570 extract_token(histr, setstr, 1, ':', sizeof histr);
571 if (!strcmp(histr, "*")) {
572 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
576 strcpy(histr, lostr);
581 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
588 * \brief Utility function to "readline" from memory
589 * \param start Location in memory from which we are reading.
590 * \param buf the buffer to place the string in.
591 * \param maxlen Size of string buffer
592 * \return Pointer to the source memory right after we stopped reading.
594 char *memreadline(char *start, char *buf, int maxlen)
598 int len = 0; /**< tally our own length to avoid strlen() delays */
604 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
607 if ((ch == 10) || (ch == 0)) {
616 * \brief Utility function to "readline" from memory
617 * \param start Location in memory from which we are reading.
618 * \param buf the buffer to place the string in.
619 * \param maxlen Size of string buffer
620 * \param retlen the length of the returned string
621 * \return Pointer to the source memory right after we stopped reading.
623 char *memreadlinelen(char *start, char *buf, int maxlen, int *retlen)
627 int len = 0; /**< tally our own length to avoid strlen() delays */
633 if ((len + 1 < (maxlen)) && (ch != 13) && (ch != 10)) {
636 if ((ch == 10) || (ch == 0)) {
648 * Strip a boundarized substring out of a string (for example, remove
649 * parentheses and anything inside them).
651 int stripout(char *str, char leftboundary, char rightboundary) {
656 for (a = 0; a < strlen(str); ++a) {
657 if (str[a] == leftboundary) lb = a;
658 if (str[a] == rightboundary) rb = a;
661 if ( (lb > 0) && (rb > lb) ) {
662 strcpy(&str[lb - 1], &str[rb + 1]);
666 else if ( (lb == 0) && (rb > lb) ) {
667 strcpy(str, &str[rb + 1]);
675 * Reduce a string down to a boundarized substring (for example, remove
676 * parentheses and anything outside them).
678 void stripallbut(char *str, char leftboundary, char rightboundary) {
681 for (a = 0; a < strlen(str); ++ a) {
682 if (str[a] == leftboundary) strcpy(str, &str[a+1]);
685 for (a = 0; a < strlen(str); ++ a) {
686 if (str[a] == rightboundary) str[a] = 0;
691 char *myfgets(char *s, int size, FILE *stream) {
692 char *ret = fgets(s, size, stream);
696 nl = strchr(s, '\n');
706 * \brief Escape a string for feeding out as a URL.
707 * \param outbuf the output buffer
708 * \param oblen the size of outbuf to sanitize
709 * \param strbuf the input buffer
711 void urlesc(char *outbuf, size_t oblen, char *strbuf)
713 int a, b, c, len, eclen, olen;
714 char *ec = " +#&;`'|*?-~<>^()[]{}/$\"\\";
717 len = strlen(strbuf);
720 for (a = 0; a < len; ++a) {
722 for (b = 0; b < eclen; ++b) {
723 if (strbuf[a] == ec[b])
727 snprintf(&outbuf[olen], oblen - olen, "%%%02x", strbuf[a]);
731 outbuf[olen ++] = strbuf[a];
739 * In our world, we want strcpy() to be able to work with overlapping strings.
744 char *strcpy(char *dest, const char *src) {
745 memmove(dest, src, (strlen(src) + 1) );
751 * Generate a new, globally unique UID parameter for a calendar etc. object
753 void generate_uuid(char *buf) {
756 sprintf(buf, "%lx-%lx-%x",
764 * bmstrcasestr() -- case-insensitive substring search
766 * This uses the Boyer-Moore search algorithm and is therefore quite fast.
767 * The code is roughly based on the strstr() replacement from 'tin' written
770 char *bmstrcasestr(char *text, char *pattern) {
772 register unsigned char *p, *t;
773 register int i, j, *delta;
779 if (!text) return(NULL);
780 if (!pattern) return(NULL);
782 textlen = strlen (text);
783 patlen = strlen (pattern);
785 /* algorithm fails if pattern is empty */
786 if ((p1 = patlen) == 0)
789 /* code below fails (whenever i is unsigned) if pattern too long */
795 for (i = 0; i <= 255; i++)
797 for (p = (unsigned char *) pattern, i = p1; --i > 0;)
798 delta[tolower(*p++)] = i;
801 * From now on, we want patlen - 1.
802 * In the loop below, p points to the end of the pattern,
803 * t points to the end of the text to be tested against the
804 * pattern, and i counts the amount of text remaining, not
805 * including the part to be tested.
808 p = (unsigned char *) pattern + p1;
809 t = (unsigned char *) text + p1;
810 i = textlen - patlen;
812 if (tolower(p[0]) == tolower(t[0])) {
813 if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
814 return ((char *)t - p1);
817 j = delta[tolower(t[0])];
829 * Local replacement for controversial C library function that generates
830 * names for temporary files. Included to shut up compiler warnings.
832 void CtdlMakeTempFileName(char *name, int len) {
835 while (i++, i < 100) {
836 snprintf(name, len, "/tmp/ctdl.%04lx.%04x",
840 if (!access(name, F_OK)) {
849 * Determine whether the specified message number is contained within the specified set.
850 * Returns nonzero if the specified message number is in the specified message set string.
852 int is_msg_in_mset(const char *mset, long msgnum) {
855 char setstr[SIZ], lostr[SIZ], histr[SIZ]; /* was 1024 */
859 * Now set it for all specified messages.
861 num_sets = num_tokens(mset, ',');
862 for (s=0; s<num_sets; ++s) {
863 extract_token(setstr, mset, s, ',', sizeof setstr);
865 extract_token(lostr, setstr, 0, ':', sizeof lostr);
866 if (num_tokens(setstr, ':') >= 2) {
867 extract_token(histr, setstr, 1, ':', sizeof histr);
868 if (!strcmp(histr, "*")) {
869 snprintf(histr, sizeof histr, "%ld", LONG_MAX);
873 strcpy(histr, lostr);
878 if ((msgnum >= lo) && (msgnum <= hi)) return(1);
886 * searches for a pattern within a search string
887 * returns position in string
889 int pattern2(char *search, char *patn)
893 len = strlen (search);
894 plen = strlen (patn);
895 for (a = 0; a < len; ++a) {
896 if (!strncasecmp(&search[a], patn, plen))
904 * Strip leading and trailing spaces from a string; with premeasured and adjusted length.
905 * buf - the string to modify
906 * len - length of the string.
908 void stripltlen(char *buf, int *len)
911 if (*len == 0) return;
912 while ((*len > delta) && (isspace(buf[delta]))){
915 memmove (buf, &buf[delta], *len - delta + 1);
918 if (*len == 0) return;
919 while (isspace(buf[(*len) - 1])){
920 buf[--(*len)] = '\0';
925 * \brief detect whether this char starts an utf-8 encoded char
926 * \param Char character to inspect
929 inline int Ctdl_IsUtf8SequenceStart(char Char)
931 /** 11??.???? indicates an UTF8 Sequence. */
932 return ((Char & 0xC0) != 0);
936 * \brief evaluate the length of an utf8 special character sequence
937 * \param Char the character to examine
938 * \returns width of utf8 chars in bytes
940 inline int Ctdl_GetUtf8SequenceLength(char Char)
945 while ((n < 8) && ((test & Char) != 0)) {
955 * \brief measure the number of glyphs in an UTF8 string...
956 * \param str string to measure
957 * \returns the length of str
959 int Ctdl_Utf8StrLen(char *str)
968 while (*aptr != '\0') {
969 if (Ctdl_IsUtf8SequenceStart(*aptr)){
970 m = Ctdl_GetUtf8SequenceLength(*aptr);
971 while ((m-- > 0) && (*aptr++ != '\0'))
984 * \brief cuts a string after maxlen glyphs
985 * \param str string to cut to maxlen glyphs
986 * \param maxlen how long may the string become?
987 * \returns pointer to maxlen or the end of the string
989 char *Ctdl_Utf8StrCut(char *str, int maxlen)
997 while (*aptr != '\0') {
998 if (Ctdl_IsUtf8SequenceStart(*aptr)){
999 m = Ctdl_GetUtf8SequenceLength(*aptr);
1000 while ((m-- > 0) && (*aptr++ != '\0'))
1017 * Convert all whitespace characters in a supplied string to underscores
1019 void convert_spaces_to_underscores(char *str)
1027 for (i=0; i<len; ++i) {
1028 if (isspace(str[i])) {