1 // Copyright (c) 1996-2022 by the citadel.org team
3 // This program is open source software. Use, duplication, or disclosure
4 // are subject to the terms of the GNU General Public License v3.
11 #include <sys/types.h>
18 #include "libcitadel.h"
20 #if TIME_WITH_SYS_TIME
21 # include <sys/time.h>
26 // This is the non-define version in case it is needed for debugging
28 inline void FindNextEnd (char *bptr, char *end) {
29 /* Find the next ?Q? */
30 end = strchr(bptr + 2, '?');
31 if (end == NULL) return NULL;
32 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) &&
33 (*(end + 2) == '?')) {
34 /* skip on to the end of the cluster, the next ?= */
35 end = strstr(end + 3, "?=");
38 /* sort of half valid encoding, try to find an end. */
39 end = strstr(bptr, "?=");
43 #define FindNextEnd(bptr, end) { \
44 end = strchr(bptr + 2, '?'); \
46 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && (*(end + 2) == '?')) { \
47 end = strstr(end + 3, "?="); \
48 } else end = strstr(bptr, "?="); \
52 // Handle subjects with RFC2047 encoding such as:
53 // =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
54 void utf8ify_rfc822_string(char *buf) {
55 char *start, *end, *next, *nextend, *ptr;
60 iconv_t ic = (iconv_t)(-1) ;
61 char *ibuf; // Buffer of characters to be converted
62 char *obuf; // Buffer for converted characters
63 size_t ibuflen; // Length of input buffer
64 size_t obuflen; // Length of output buffer
65 char *isav; // Saved pointer to input buffer
66 char *osav; // Saved pointer to output buffer
69 int illegal_non_rfc2047_encoding = 0;
71 // Sometimes, badly formed messages contain strings which were simply
72 // written out directly in some foreign character set instead of
73 // using RFC2047 encoding. This is illegal but we will attempt to
74 // handle it anyway by converting from a user-specified default
75 // charset to UTF-8 if we see any nonprintable characters.
77 for (i=0; i<len; ++i) {
78 if ((buf[i] < 32) || (buf[i] > 126)) {
79 illegal_non_rfc2047_encoding = 1;
80 i = len; // take a shortcut, it won't be more than one.
83 if (illegal_non_rfc2047_encoding) {
84 const char *default_header_charset = "iso-8859-1";
85 if ( (strcasecmp(default_header_charset, "UTF-8")) && (strcasecmp(default_header_charset, "us-ascii")) ) {
86 ctdl_iconv_open("UTF-8", default_header_charset, &ic);
87 if (ic != (iconv_t)(-1) ) {
90 safestrncpy(ibuf, buf, 1024);
91 ibuflen = strlen(ibuf);
93 obuf = (char *) malloc(obuflen);
95 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
96 osav[1024-obuflen] = 0;
105 // pre evaluate the first pair
106 nextend = end = NULL;
108 start = strstr(buf, "=?");
110 FindNextEnd (start, end);
112 while ((start != NULL) && (end != NULL)) {
113 next = strstr(end, "=?");
115 FindNextEnd(next, nextend);
119 // did we find two partitions
120 if ((next != NULL) && ((next - end) > 2)) {
122 while ((ptr < next) &&
128 // did we find a gab just filled with blanks?
130 memmove(end + 2, next, len - (next - start));
132 // now terminate the gab at the end
133 delta = (next - end) - 2;
137 // move next to its new location.
142 // our next-pair is our new first pair now.
147 // Now we handle foreign character sets properly encoded in RFC2047 format.
148 start = strstr(buf, "=?");
149 FindNextEnd((start != NULL)? start : buf, end);
150 while (start != NULL && end != NULL && end > start) {
151 extract_token(charset, start, 1, '?', sizeof charset);
152 extract_token(encoding, start, 2, '?', sizeof encoding);
153 extract_token(istr, start, 3, '?', sizeof istr);
157 if (!strcasecmp(encoding, "B")) { // base64
158 ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
160 else if (!strcasecmp(encoding, "Q")) { // quoted-printable
167 if (istr[pos] == '_') istr[pos] = ' ';
170 ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
173 strcpy(ibuf, istr); // unknown encoding
174 ibuflen = strlen(istr);
177 ctdl_iconv_open("UTF-8", charset, &ic);
178 if (ic != (iconv_t)(-1) ) {
180 obuf = (char *) malloc(obuflen);
182 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
183 osav[1024-obuflen] = 0;
188 remove_token(end, 0, '?');
189 remove_token(end, 0, '?');
190 remove_token(end, 0, '?');
191 remove_token(end, 0, '?');
192 strcpy(end, &end[1]);
194 snprintf(newbuf, sizeof newbuf, "%s%s%s", buf, osav, end);
203 remove_token(end, 0, '?');
204 remove_token(end, 0, '?');
205 remove_token(end, 0, '?');
206 remove_token(end, 0, '?');
207 strcpy(end, &end[1]);
209 snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", buf, end);
215 // Since spammers will go to all sorts of absurd lengths to get their
216 // messages through, there are LOTS of corrupt headers out there.
217 // So, prevent a really badly formed RFC2047 header from throwing
218 // this function into an infinite loop.
220 if (passes > 20) return;
222 start = strstr(buf, "=?");
223 FindNextEnd((start != NULL)? start : buf, end);