1 // Copyright (c) 1996-2022 by the citadel.org team
3 // This program is open source software. Use, duplication, or disclosure
4 // are subject to the terms of the GNU General Public License v3.
11 #include <sys/types.h>
19 #include "libcitadel.h"
22 // This is the non-define version in case it is needed for debugging
24 inline void FindNextEnd (char *bptr, char *end) {
25 /* Find the next ?Q? */
26 end = strchr(bptr + 2, '?');
27 if (end == NULL) return NULL;
28 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) &&
29 (*(end + 2) == '?')) {
30 /* skip on to the end of the cluster, the next ?= */
31 end = strstr(end + 3, "?=");
34 /* sort of half valid encoding, try to find an end. */
35 end = strstr(bptr, "?=");
39 #define FindNextEnd(bptr, end) { \
40 end = strchr(bptr + 2, '?'); \
42 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && (*(end + 2) == '?')) { \
43 end = strstr(end + 3, "?="); \
44 } else end = strstr(bptr, "?="); \
48 // Handle subjects with RFC2047 encoding such as:
49 // =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
50 void utf8ify_rfc822_string(char *buf) {
51 char *start, *end, *next, *nextend, *ptr;
56 iconv_t ic = (iconv_t)(-1) ;
57 char *ibuf; // Buffer of characters to be converted
58 char *obuf; // Buffer for converted characters
59 size_t ibuflen; // Length of input buffer
60 size_t obuflen; // Length of output buffer
61 char *isav; // Saved pointer to input buffer
62 char *osav; // Saved pointer to output buffer
65 int illegal_non_rfc2047_encoding = 0;
67 // Sometimes, badly formed messages contain strings which were simply
68 // written out directly in some foreign character set instead of
69 // using RFC2047 encoding. This is illegal but we will attempt to
70 // handle it anyway by converting from a user-specified default
71 // charset to UTF-8 if we see any nonprintable characters.
73 for (i=0; i<len; ++i) {
74 if ((buf[i] < 32) || (buf[i] > 126)) {
75 illegal_non_rfc2047_encoding = 1;
76 i = len; // take a shortcut, it won't be more than one.
79 if (illegal_non_rfc2047_encoding) {
80 const char *default_header_charset = "iso-8859-1";
81 if ( (strcasecmp(default_header_charset, "UTF-8")) && (strcasecmp(default_header_charset, "us-ascii")) ) {
82 ctdl_iconv_open("UTF-8", default_header_charset, &ic);
83 if (ic != (iconv_t)(-1) ) {
86 safestrncpy(ibuf, buf, 1024);
87 ibuflen = strlen(ibuf);
89 obuf = (char *) malloc(obuflen);
91 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
92 osav[1024-obuflen] = 0;
101 // pre evaluate the first pair
102 nextend = end = NULL;
104 start = strstr(buf, "=?");
106 FindNextEnd (start, end);
108 while ((start != NULL) && (end != NULL)) {
109 next = strstr(end, "=?");
111 FindNextEnd(next, nextend);
115 // did we find two partitions
116 if ((next != NULL) && ((next - end) > 2)) {
118 while ((ptr < next) &&
124 // did we find a gab just filled with blanks?
126 memmove(end + 2, next, len - (next - start));
128 // now terminate the gab at the end
129 delta = (next - end) - 2;
133 // move next to its new location.
138 // our next-pair is our new first pair now.
143 // Now we handle foreign character sets properly encoded in RFC2047 format.
144 start = strstr(buf, "=?");
145 FindNextEnd((start != NULL)? start : buf, end);
146 while (start != NULL && end != NULL && end > start) {
147 extract_token(charset, start, 1, '?', sizeof charset);
148 extract_token(encoding, start, 2, '?', sizeof encoding);
149 extract_token(istr, start, 3, '?', sizeof istr);
153 if (!strcasecmp(encoding, "B")) { // base64
154 ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
156 else if (!strcasecmp(encoding, "Q")) { // quoted-printable
163 if (istr[pos] == '_') istr[pos] = ' ';
166 ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
169 strcpy(ibuf, istr); // unknown encoding
170 ibuflen = strlen(istr);
173 ctdl_iconv_open("UTF-8", charset, &ic);
174 if (ic != (iconv_t)(-1) ) {
176 obuf = (char *) malloc(obuflen);
178 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
179 osav[1024-obuflen] = 0;
184 remove_token(end, 0, '?');
185 remove_token(end, 0, '?');
186 remove_token(end, 0, '?');
187 remove_token(end, 0, '?');
188 strcpy(end, &end[1]);
190 snprintf(newbuf, sizeof newbuf, "%s%s%s", buf, osav, end);
199 remove_token(end, 0, '?');
200 remove_token(end, 0, '?');
201 remove_token(end, 0, '?');
202 remove_token(end, 0, '?');
203 strcpy(end, &end[1]);
205 snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", buf, end);
211 // Since spammers will go to all sorts of absurd lengths to get their
212 // messages through, there are LOTS of corrupt headers out there.
213 // So, prevent a really badly formed RFC2047 header from throwing
214 // this function into an infinite loop.
216 if (passes > 20) return;
218 start = strstr(buf, "=?");
219 FindNextEnd((start != NULL)? start : buf, end);