1 // Copyright (c) 1996-2022 by the citadel.org team
3 // This program is open source software. Use, duplication, or disclosure
4 // is subject to the terms of the GNU General Public License v3.
11 #include <sys/types.h>
19 #include "libcitadel.h"
22 #define FindNextEnd(bptr, end) { \
23 end = strchr(bptr + 2, '?'); \
25 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && (*(end + 2) == '?')) { \
26 end = strstr(end + 3, "?="); \
27 } else end = strstr(bptr, "?="); \
32 // Handle subjects with RFC2047 encoding such as:
33 // =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
34 void utf8ify_rfc822_string(char *buf) {
35 char *start, *end, *next, *nextend, *ptr;
40 iconv_t ic = (iconv_t)(-1) ;
41 char *ibuf; // Buffer of characters to be converted
42 char *obuf; // Buffer for converted characters
43 size_t ibuflen; // Length of input buffer
44 size_t obuflen; // Length of output buffer
45 char *isav; // Saved pointer to input buffer
46 char *osav; // Saved pointer to output buffer
49 int illegal_non_rfc2047_encoding = 0;
51 // Sometimes, badly formed messages contain strings which were simply
52 // written out directly in some foreign character set instead of
53 // using RFC2047 encoding. This is illegal but we will attempt to
54 // handle it anyway by converting from a user-specified default
55 // charset to UTF-8 if we see any nonprintable characters.
57 for (i=0; i<len; ++i) {
58 if ((buf[i] < 32) || (buf[i] > 126)) {
59 illegal_non_rfc2047_encoding = 1;
60 i = len; // take a shortcut, it won't be more than one.
63 if (illegal_non_rfc2047_encoding) {
64 const char *default_header_charset = "iso-8859-1";
65 if ( (strcasecmp(default_header_charset, "UTF-8")) && (strcasecmp(default_header_charset, "us-ascii")) ) {
66 ctdl_iconv_open("UTF-8", default_header_charset, &ic);
67 if (ic != (iconv_t)(-1) ) {
70 safestrncpy(ibuf, buf, 1024);
71 ibuflen = strlen(ibuf);
73 obuf = (char *) malloc(obuflen);
75 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
76 osav[1024-obuflen] = 0;
85 // pre evaluate the first pair
88 start = strstr(buf, "=?");
90 FindNextEnd (start, end);
92 while ((start != NULL) && (end != NULL)) {
93 next = strstr(end, "=?");
95 FindNextEnd(next, nextend);
99 // did we find two partitions
100 if ((next != NULL) && ((next - end) > 2)) {
102 while ((ptr < next) &&
108 // did we find a gab just filled with blanks?
110 memmove(end + 2, next, len - (next - start));
112 // now terminate the gab at the end
113 delta = (next - end) - 2;
117 // move next to its new location.
122 // our next-pair is our new first pair now.
127 // Now we handle foreign character sets properly encoded in RFC2047 format.
128 start = strstr(buf, "=?");
129 FindNextEnd((start != NULL)? start : buf, end);
130 while (start != NULL && end != NULL && end > start) {
131 extract_token(charset, start, 1, '?', sizeof charset);
132 extract_token(encoding, start, 2, '?', sizeof encoding);
133 extract_token(istr, start, 3, '?', sizeof istr);
137 if (!strcasecmp(encoding, "B")) { // base64
138 ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
140 else if (!strcasecmp(encoding, "Q")) { // quoted-printable
147 if (istr[pos] == '_') istr[pos] = ' ';
150 ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
153 strcpy(ibuf, istr); // unknown encoding
154 ibuflen = strlen(istr);
157 ctdl_iconv_open("UTF-8", charset, &ic);
158 if (ic != (iconv_t)(-1) ) {
160 obuf = (char *) malloc(obuflen);
162 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
163 osav[1024-obuflen] = 0;
168 remove_token(end, 0, '?');
169 remove_token(end, 0, '?');
170 remove_token(end, 0, '?');
171 remove_token(end, 0, '?');
172 strcpy(end, &end[1]);
174 snprintf(newbuf, sizeof newbuf, "%s%s%s", buf, osav, end);
183 remove_token(end, 0, '?');
184 remove_token(end, 0, '?');
185 remove_token(end, 0, '?');
186 remove_token(end, 0, '?');
187 strcpy(end, &end[1]);
189 snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", buf, end);
195 // Since spammers will go to all sorts of absurd lengths to get their
196 // messages through, there are LOTS of corrupt headers out there.
197 // So, prevent a really badly formed RFC2047 header from throwing
198 // this function into an infinite loop.
200 if (passes > 20) return;
202 start = strstr(buf, "=?");
203 FindNextEnd((start != NULL)? start : buf, end);