4 * This file contains functions which handle the mapping of Internet addresses
5 * to users on the Citadel system.
17 #include <sys/types.h>
19 #if TIME_WITH_SYS_TIME
20 # include <sys/time.h>
24 # include <sys/time.h>
33 #include <libcitadel.h>
36 #include "sysdep_decls.h"
37 #include "citserver.h"
41 #include "internet_addressing.h"
44 #include "parsedate.h"
47 #include "ctdl_module.h"
58 /* This is the non-define version in case of s.b. needing to debug */
59 inline void FindNextEnd (char *bptr, char *end)
61 /* Find the next ?Q? */
62 end = strchr(bptr + 2, '?');
63 if (end == NULL) return NULL;
64 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) &&
65 (*(end + 2) == '?')) {
66 /* skip on to the end of the cluster, the next ?= */
67 end = strstr(end + 3, "?=");
70 /* sort of half valid encoding, try to find an end. */
71 end = strstr(bptr, "?=");
75 #define FindNextEnd(bptr, end) { \
76 end = strchr(bptr + 2, '?'); \
78 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && (*(end + 2) == '?')) { \
79 end = strstr(end + 3, "?="); \
80 } else end = strstr(bptr, "?="); \
85 * Handle subjects with RFC2047 encoding such as:
86 * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
88 void utf8ify_rfc822_string(char *buf) {
89 char *start, *end, *next, *nextend, *ptr;
94 iconv_t ic = (iconv_t)(-1) ;
95 char *ibuf; /**< Buffer of characters to be converted */
96 char *obuf; /**< Buffer for converted characters */
97 size_t ibuflen; /**< Length of input buffer */
98 size_t obuflen; /**< Length of output buffer */
99 char *isav; /**< Saved pointer to input buffer */
100 char *osav; /**< Saved pointer to output buffer */
103 int illegal_non_rfc2047_encoding = 0;
105 /* Sometimes, badly formed messages contain strings which were simply
106 * written out directly in some foreign character set instead of
107 * using RFC2047 encoding. This is illegal but we will attempt to
108 * handle it anyway by converting from a user-specified default
109 * charset to UTF-8 if we see any nonprintable characters.
112 for (i=0; i<len; ++i) {
113 if ((buf[i] < 32) || (buf[i] > 126)) {
114 illegal_non_rfc2047_encoding = 1;
115 i = len; ///< take a shortcut, it won't be more than one.
118 if (illegal_non_rfc2047_encoding) {
119 const char *default_header_charset = "iso-8859-1";
120 if ( (strcasecmp(default_header_charset, "UTF-8")) && (strcasecmp(default_header_charset, "us-ascii")) ) {
121 ctdl_iconv_open("UTF-8", default_header_charset, &ic);
122 if (ic != (iconv_t)(-1) ) {
125 safestrncpy(ibuf, buf, 1024);
126 ibuflen = strlen(ibuf);
128 obuf = (char *) malloc(obuflen);
130 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
131 osav[1024-obuflen] = 0;
140 /* pre evaluate the first pair */
141 nextend = end = NULL;
143 start = strstr(buf, "=?");
145 FindNextEnd (start, end);
147 while ((start != NULL) && (end != NULL))
149 next = strstr(end, "=?");
151 FindNextEnd(next, nextend);
155 /* did we find two partitions */
156 if ((next != NULL) &&
160 while ((ptr < next) &&
166 /* did we find a gab just filled with blanks? */
171 len - (next - start));
173 /* now terminate the gab at the end */
174 delta = (next - end) - 2;
178 /* move next to its new location. */
183 /* our next-pair is our new first pair now. */
188 /* Now we handle foreign character sets properly encoded
191 start = strstr(buf, "=?");
192 FindNextEnd((start != NULL)? start : buf, end);
193 while (start != NULL && end != NULL && end > start)
195 extract_token(charset, start, 1, '?', sizeof charset);
196 extract_token(encoding, start, 2, '?', sizeof encoding);
197 extract_token(istr, start, 3, '?', sizeof istr);
201 if (!strcasecmp(encoding, "B")) { /**< base64 */
202 ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
204 else if (!strcasecmp(encoding, "Q")) { /**< quoted-printable */
212 if (istr[pos] == '_') istr[pos] = ' ';
216 ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
219 strcpy(ibuf, istr); /**< unknown encoding */
220 ibuflen = strlen(istr);
223 ctdl_iconv_open("UTF-8", charset, &ic);
224 if (ic != (iconv_t)(-1) ) {
226 obuf = (char *) malloc(obuflen);
228 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
229 osav[1024-obuflen] = 0;
234 remove_token(end, 0, '?');
235 remove_token(end, 0, '?');
236 remove_token(end, 0, '?');
237 remove_token(end, 0, '?');
238 strcpy(end, &end[1]);
240 snprintf(newbuf, sizeof newbuf, "%s%s%s", buf, osav, end);
249 remove_token(end, 0, '?');
250 remove_token(end, 0, '?');
251 remove_token(end, 0, '?');
252 remove_token(end, 0, '?');
253 strcpy(end, &end[1]);
255 snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", buf, end);
262 * Since spammers will go to all sorts of absurd lengths to get their
263 * messages through, there are LOTS of corrupt headers out there.
264 * So, prevent a really badly formed RFC2047 header from throwing
265 * this function into an infinite loop.
268 if (passes > 20) return;
270 start = strstr(buf, "=?");
271 FindNextEnd((start != NULL)? start : buf, end);
276 inline void utf8ify_rfc822_string(char *a){};
287 char *inetcfg = NULL;
288 struct spamstrings_t *spamstrings = NULL;
292 * Return nonzero if the supplied name is an alias for this host.
294 int CtdlHostAlias(char *fqdn) {
298 char host[256], type[256];
301 if (fqdn == NULL) return(hostalias_nomatch);
302 if (IsEmptyStr(fqdn)) return(hostalias_nomatch);
303 if (!strcasecmp(fqdn, "localhost")) return(hostalias_localhost);
304 if (!strcasecmp(fqdn, config.c_fqdn)) return(hostalias_localhost);
305 if (!strcasecmp(fqdn, config.c_nodename)) return(hostalias_localhost);
306 if (inetcfg == NULL) return(hostalias_nomatch);
308 config_lines = num_tokens(inetcfg, '\n');
309 for (i=0; i<config_lines; ++i) {
310 extract_token(buf, inetcfg, i, '\n', sizeof buf);
311 extract_token(host, buf, 0, '|', sizeof host);
312 extract_token(type, buf, 1, '|', sizeof type);
316 /* Process these in a specific order, in case there are multiple matches.
317 * We want directory to override masq, for example.
320 if ( (!strcasecmp(type, "masqdomain")) && (!strcasecmp(fqdn, host))) {
321 found = hostalias_masq;
323 if ( (!strcasecmp(type, "localhost")) && (!strcasecmp(fqdn, host))) {
324 found = hostalias_localhost;
326 if ( (!strcasecmp(type, "directory")) && (!strcasecmp(fqdn, host))) {
327 found = hostalias_directory;
330 if (found) return(found);
333 return(hostalias_nomatch);
343 * Return 0 if a given string fuzzy-matches a Citadel user account
345 * FIXME ... this needs to be updated to handle aliases.
347 int fuzzy_match(struct ctdluser *us, char *matchstring) {
351 if ( (!strncasecmp(matchstring, "cit", 3))
352 && (atol(&matchstring[3]) == us->usernum)) {
356 len = strlen(matchstring);
357 for (a=0; !IsEmptyStr(&us->fullname[a]); ++a) {
358 if (!strncasecmp(&us->fullname[a],
368 * Unfold a multi-line field into a single line, removing multi-whitespaces
370 void unfold_rfc822_field(char *field) {
374 striplt(field); /* remove leading/trailing whitespace */
376 /* convert non-space whitespace to spaces, and remove double blanks */
377 for (i=0; i<strlen(field); ++i) {
378 if (field[i]=='\"') quote = 1 - quote;
380 if (isspace(field[i])) field[i] = ' ';
381 while (isspace(field[i]) && isspace(field[i+1])) {
382 strcpy(&field[i+1], &field[i+2]);
391 * Split an RFC822-style address into userid, host, and full name
394 void process_rfc822_addr(const char *rfc822, char *user, char *node, char *name)
399 strcpy(node, config.c_fqdn);
402 if (rfc822 == NULL) return;
404 /* extract full name - first, it's From minus <userid> */
405 strcpy(name, rfc822);
406 stripout(name, '<', '>');
408 /* strip anything to the left of a bang */
409 while ((!IsEmptyStr(name)) && (haschar(name, '!') > 0))
410 strcpy(name, &name[1]);
412 /* and anything to the right of a @ or % */
413 for (a = 0; a < strlen(name); ++a) {
420 /* but if there are parentheses, that changes the rules... */
421 if ((haschar(rfc822, '(') == 1) && (haschar(rfc822, ')') == 1)) {
422 strcpy(name, rfc822);
423 stripallbut(name, '(', ')');
426 /* but if there are a set of quotes, that supersedes everything */
427 if (haschar(rfc822, 34) == 2) {
428 strcpy(name, rfc822);
429 while ((!IsEmptyStr(name)) && (name[0] != 34)) {
430 strcpy(&name[0], &name[1]);
432 strcpy(&name[0], &name[1]);
433 for (a = 0; a < strlen(name); ++a)
437 /* extract user id */
438 strcpy(user, rfc822);
440 /* first get rid of anything in parens */
441 stripout(user, '(', ')');
443 /* if there's a set of angle brackets, strip it down to that */
444 if ((haschar(user, '<') == 1) && (haschar(user, '>') == 1)) {
445 stripallbut(user, '<', '>');
448 /* strip anything to the left of a bang */
449 while ((!IsEmptyStr(user)) && (haschar(user, '!') > 0))
450 strcpy(user, &user[1]);
452 /* and anything to the right of a @ or % */
453 for (a = 0; a < strlen(user); ++a) {
461 /* extract node name */
462 strcpy(node, rfc822);
464 /* first get rid of anything in parens */
465 stripout(node, '(', ')');
467 /* if there's a set of angle brackets, strip it down to that */
468 if ((haschar(node, '<') == 1) && (haschar(node, '>') == 1)) {
469 stripallbut(node, '<', '>');
472 /* If no node specified, tack ours on instead */
474 (haschar(node, '@')==0)
475 && (haschar(node, '%')==0)
476 && (haschar(node, '!')==0)
478 strcpy(node, config.c_nodename);
483 /* strip anything to the left of a @ */
484 while ((!IsEmptyStr(node)) && (haschar(node, '@') > 0))
485 strcpy(node, &node[1]);
487 /* strip anything to the left of a % */
488 while ((!IsEmptyStr(node)) && (haschar(node, '%') > 0))
489 strcpy(node, &node[1]);
491 /* reduce multiple system bang paths to node!user */
492 while ((!IsEmptyStr(node)) && (haschar(node, '!') > 1))
493 strcpy(node, &node[1]);
495 /* now get rid of the user portion of a node!user string */
496 for (a = 0; a < strlen(node); ++a)
501 /* strip leading and trailing spaces in all strings */
506 /* If we processed a string that had the address in angle brackets
507 * but no name outside the brackets, we now have an empty name. In
508 * this case, use the user portion of the address as the name.
510 if ((IsEmptyStr(name)) && (!IsEmptyStr(user))) {
518 * convert_field() is a helper function for convert_internet_message().
519 * Given start/end positions for an rfc822 field, it converts it to a Citadel
520 * field if it wants to, and unfolds it if necessary.
522 * Returns 1 if the field was converted and inserted into the Citadel message
523 * structure, implying that the source field should be removed from the
526 int convert_field(struct CtdlMessage *msg, int beg, int end) {
539 rfc822 = msg->cm_fields['M']; /* M field contains rfc822 text */
540 for (i = end; i >= beg; --i) {
541 if (rfc822[i] == ':') colonpos = i;
544 if (colonpos < 0) return(0); /* no colon? not a valid header line */
546 key = malloc((end - beg) + 2);
547 safestrncpy(key, &rfc822[beg], (end-beg)+1);
548 key[colonpos - beg] = 0;
549 value = &key[(colonpos - beg) + 1];
550 unfold_rfc822_field(value);
553 * Here's the big rfc822-to-citadel loop.
556 /* Date/time is converted into a unix timestamp. If the conversion
557 * fails, we replace it with the time the message arrived locally.
559 if (!strcasecmp(key, "Date")) {
560 parsed_date = parsedate(value);
561 if (parsed_date < 0L) parsed_date = time(NULL);
562 snprintf(buf, sizeof buf, "%ld", (long)parsed_date );
563 if (msg->cm_fields['T'] == NULL)
564 msg->cm_fields['T'] = strdup(buf);
568 else if (!strcasecmp(key, "From")) {
569 process_rfc822_addr(value, user, node, name);
570 CtdlLogPrintf(CTDL_DEBUG, "Converted to <%s@%s> (%s)\n", user, node, name);
571 snprintf(addr, sizeof addr, "%s@%s", user, node);
572 if (msg->cm_fields['A'] == NULL)
573 msg->cm_fields['A'] = strdup(name);
575 if (msg->cm_fields['F'] == NULL)
576 msg->cm_fields['F'] = strdup(addr);
580 else if (!strcasecmp(key, "Subject")) {
581 if (msg->cm_fields['U'] == NULL)
582 msg->cm_fields['U'] = strdup(value);
586 else if (!strcasecmp(key, "List-ID")) {
587 if (msg->cm_fields['L'] == NULL)
588 msg->cm_fields['L'] = strdup(value);
592 else if (!strcasecmp(key, "To")) {
593 if (msg->cm_fields['R'] == NULL)
594 msg->cm_fields['R'] = strdup(value);
598 else if (!strcasecmp(key, "CC")) {
599 if (msg->cm_fields['Y'] == NULL)
600 msg->cm_fields['Y'] = strdup(value);
604 else if (!strcasecmp(key, "Message-ID")) {
605 if (msg->cm_fields['I'] != NULL) {
606 CtdlLogPrintf(CTDL_WARNING, "duplicate message id\n");
609 if (msg->cm_fields['I'] == NULL) {
610 msg->cm_fields['I'] = strdup(value);
612 /* Strip angle brackets */
613 while (haschar(msg->cm_fields['I'], '<') > 0) {
614 strcpy(&msg->cm_fields['I'][0],
615 &msg->cm_fields['I'][1]);
617 for (i = 0; i<strlen(msg->cm_fields['I']); ++i)
618 if (msg->cm_fields['I'][i] == '>')
619 msg->cm_fields['I'][i] = 0;
625 else if (!strcasecmp(key, "Return-Path")) {
626 if (msg->cm_fields['P'] == NULL)
627 msg->cm_fields['P'] = strdup(value);
631 else if (!strcasecmp(key, "Envelope-To")) {
632 if (msg->cm_fields['V'] == NULL)
633 msg->cm_fields['V'] = strdup(value);
637 else if (!strcasecmp(key, "References")) {
638 if (msg->cm_fields['W'] != NULL) {
639 free(msg->cm_fields['W']);
641 msg->cm_fields['W'] = strdup(value);
645 else if (!strcasecmp(key, "In-reply-to")) {
646 if (msg->cm_fields['W'] == NULL) { /* References: supersedes In-reply-to: */
647 msg->cm_fields['W'] = strdup(value);
654 /* Clean up and move on. */
655 free(key); /* Don't free 'value', it's actually the same buffer */
661 * Convert RFC822 references format (References) to Citadel references format (Weferences)
663 void convert_references_to_wefewences(char *str) {
664 int bracket_nesting = 0;
666 char *moveptr = NULL;
673 if (bracket_nesting < 0) bracket_nesting = 0;
675 if ((ch == '>') && (bracket_nesting == 0) && (*(ptr+1)) && (ptr>str) ) {
679 else if (bracket_nesting > 0) {
685 *moveptr = *(moveptr+1);
689 if (ch == '<') ++bracket_nesting;
696 * Convert an RFC822 message (headers + body) to a CtdlMessage structure.
697 * NOTE: the supplied buffer becomes part of the CtdlMessage structure, and
698 * will be deallocated when CtdlFreeMessage() is called. Therefore, the
699 * supplied buffer should be DEREFERENCED. It should not be freed or used
702 struct CtdlMessage *convert_internet_message(char *rfc822) {
704 struct CtdlMessage *msg;
705 int pos, beg, end, msglen;
710 msg = malloc(sizeof(struct CtdlMessage));
711 if (msg == NULL) return msg;
713 memset(msg, 0, sizeof(struct CtdlMessage));
714 msg->cm_magic = CTDLMESSAGE_MAGIC; /* self check */
715 msg->cm_anon_type = 0; /* never anonymous */
716 msg->cm_format_type = FMT_RFC822; /* internet message */
717 msg->cm_fields['M'] = rfc822;
724 /* Locate beginning and end of field, keeping in mind that
725 * some fields might be multiline
730 msglen = strlen(rfc822);
731 while ( (end < 0) && (done == 0) ) {
733 if ((rfc822[pos]=='\n')
734 && (!isspace(rfc822[pos+1]))) {
738 /* done with headers? */
739 if ( (rfc822[pos]=='\n')
740 && ( (rfc822[pos+1]=='\n')
741 ||(rfc822[pos+1]=='\r')) ) {
746 if (pos >= (msglen-1) ) {
755 /* At this point we have a field. Are we interested in it? */
756 converted = convert_field(msg, beg, end);
758 /* Strip the field out of the RFC822 header if we used it */
760 strcpy(&rfc822[beg], &rfc822[pos]);
764 /* If we've hit the end of the message, bail out */
765 if (pos > strlen(rfc822)) done = 1;
768 /* Follow-up sanity checks... */
770 /* If there's no timestamp on this message, set it to now. */
771 if (msg->cm_fields['T'] == NULL) {
772 snprintf(buf, sizeof buf, "%ld", (long)time(NULL));
773 msg->cm_fields['T'] = strdup(buf);
776 /* If a W (references, or rather, Wefewences) field is present, we
777 * have to convert it from RFC822 format to Citadel format.
779 if (msg->cm_fields['W'] != NULL) {
780 convert_references_to_wefewences(msg->cm_fields['W']);
789 * Look for a particular header field in an RFC822 message text. If the
790 * requested field is found, it is unfolded (if necessary) and returned to
791 * the caller. The field name is stripped out, leaving only its contents.
792 * The caller is responsible for freeing the returned buffer. If the requested
793 * field is not present, or anything else goes wrong, it returns NULL.
795 char *rfc822_fetch_field(char *rfc822, char *fieldname) {
796 char *fieldbuf = NULL;
797 char *end_of_headers;
803 /* Should never happen, but sometimes we get stupid */
804 if (rfc822 == NULL) return(NULL);
805 if (fieldname == NULL) return(NULL);
807 snprintf(fieldhdr, sizeof fieldhdr, "%s:", fieldname);
809 /* Locate the end of the headers, so we don't run past that point */
810 end_of_headers = bmstrcasestr(rfc822, "\n\r\n");
811 if (end_of_headers == NULL) {
812 end_of_headers = bmstrcasestr(rfc822, "\n\n");
814 if (end_of_headers == NULL) return (NULL);
816 field_start = bmstrcasestr(rfc822, fieldhdr);
817 if (field_start == NULL) return(NULL);
818 if (field_start > end_of_headers) return(NULL);
820 fieldbuf = malloc(SIZ);
821 strcpy(fieldbuf, "");
824 ptr = memreadline(ptr, fieldbuf, SIZ-strlen(fieldbuf) );
825 while ( (isspace(ptr[0])) && (ptr < end_of_headers) ) {
826 strcat(fieldbuf, " ");
827 cont = &fieldbuf[strlen(fieldbuf)];
828 ptr = memreadline(ptr, cont, SIZ-strlen(fieldbuf) );
832 strcpy(fieldbuf, &fieldbuf[strlen(fieldhdr)]);
840 /*****************************************************************************
841 * DIRECTORY MANAGEMENT FUNCTIONS *
842 *****************************************************************************/
845 * Generate the index key for an Internet e-mail address to be looked up
848 void directory_key(char *key, char *addr) {
852 for (i=0; !IsEmptyStr(&addr[i]); ++i) {
853 if (!isspace(addr[i])) {
854 key[keylen++] = tolower(addr[i]);
859 CtdlLogPrintf(CTDL_DEBUG, "Directory key is <%s>\n", key);
864 /* Return nonzero if the supplied address is in a domain we keep in
867 int IsDirectory(char *addr, int allow_masq_domains) {
871 extract_token(domain, addr, 1, '@', sizeof domain);
874 h = CtdlHostAlias(domain);
876 if ( (h == hostalias_masq) && allow_masq_domains)
879 if ( (h == hostalias_localhost) || (h == hostalias_directory) ) {
889 * Initialize the directory database (erasing anything already there)
891 void CtdlDirectoryInit(void) {
892 cdb_trunc(CDB_DIRECTORY);
897 * Add an Internet e-mail address to the directory for a user
899 void CtdlDirectoryAddUser(char *internet_addr, char *citadel_addr) {
902 if (IsDirectory(internet_addr, 0) == 0) return;
903 CtdlLogPrintf(CTDL_DEBUG, "Create directory entry: %s --> %s\n", internet_addr, citadel_addr);
904 directory_key(key, internet_addr);
905 cdb_store(CDB_DIRECTORY, key, strlen(key), citadel_addr, strlen(citadel_addr)+1 );
910 * Delete an Internet e-mail address from the directory.
912 * (NOTE: we don't actually use or need the citadel_addr variable; it's merely
913 * here because the callback API expects to be able to send it.)
915 void CtdlDirectoryDelUser(char *internet_addr, char *citadel_addr) {
918 CtdlLogPrintf(CTDL_DEBUG, "Delete directory entry: %s --> %s\n", internet_addr, citadel_addr);
919 directory_key(key, internet_addr);
920 cdb_delete(CDB_DIRECTORY, key, strlen(key) );
925 * Look up an Internet e-mail address in the directory.
926 * On success: returns 0, and Citadel address stored in 'target'
927 * On failure: returns nonzero
929 int CtdlDirectoryLookup(char *target, char *internet_addr, size_t targbuflen) {
930 struct cdbdata *cdbrec;
933 /* Dump it in there unchanged, just for kicks */
934 safestrncpy(target, internet_addr, targbuflen);
936 /* Only do lookups for addresses with hostnames in them */
937 if (num_tokens(internet_addr, '@') != 2) return(-1);
939 /* Only do lookups for domains in the directory */
940 if (IsDirectory(internet_addr, 0) == 0) return(-1);
942 directory_key(key, internet_addr);
943 cdbrec = cdb_fetch(CDB_DIRECTORY, key, strlen(key) );
944 if (cdbrec != NULL) {
945 safestrncpy(target, cdbrec->ptr, targbuflen);
955 * Harvest any email addresses that someone might want to have in their
956 * "collected addresses" book.
958 char *harvest_collected_addresses(struct CtdlMessage *msg) {
961 char user[256], node[256], name[256];
966 if (msg == NULL) return(NULL);
970 if (msg->cm_fields['A'] != NULL) {
971 strcat(addr, msg->cm_fields['A']);
973 if (msg->cm_fields['F'] != NULL) {
975 strcat(addr, msg->cm_fields['F']);
977 if (IsDirectory(msg->cm_fields['F'], 0)) {
982 if (is_harvestable) {
989 if (coll == NULL) return(NULL);
991 /* Scan both the R (To) and Y (CC) fields */
992 for (i = 0; i < 2; ++i) {
993 if (i == 0) field = 'R' ;
994 if (i == 1) field = 'Y' ;
996 if (msg->cm_fields[field] != NULL) {
997 for (j=0; j<num_tokens(msg->cm_fields[field], ','); ++j) {
998 extract_token(addr, msg->cm_fields[field], j, ',', sizeof addr);
999 if (strstr(addr, "=?") != NULL)
1000 utf8ify_rfc822_string(addr);
1001 process_rfc822_addr(addr, user, node, name);
1002 h = CtdlHostAlias(node);
1003 if ( (h != hostalias_localhost) && (h != hostalias_directory) ) {
1004 coll = realloc(coll, strlen(coll) + strlen(addr) + 4);
1005 if (coll == NULL) return(NULL);
1006 if (!IsEmptyStr(coll)) {
1016 if (IsEmptyStr(coll)) {