2 * Copyright (c) 1996-2010 by the citadel.org team
4 * This program is open source software. You can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 3 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Wrapper around iconv_open()
24 * Our version adds aliases for non-standard Microsoft charsets
25 * such as 'MS950', aliasing them to names like 'CP950'
27 * tocode Target encoding
28 * fromcode Source encoding
30 iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode)
32 iconv_t ic = (iconv_t)(-1) ;
33 ic = iconv_open(tocode, fromcode);
34 if (ic == (iconv_t)(-1) ) {
35 char alias_fromcode[64];
36 if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) {
37 safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode);
38 alias_fromcode[0] = 'C';
39 alias_fromcode[1] = 'P';
40 ic = iconv_open(tocode, alias_fromcode);
48 static inline char *FindNextEnd (char *bptr)
51 /* Find the next ?Q? */
52 end = strchr(bptr + 2, '?');
53 if (end == NULL) return NULL;
54 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) &&
55 (*(end + 2) == '?')) {
56 /* skip on to the end of the cluster, the next ?= */
57 end = strstr(end + 3, "?=");
60 /* sort of half valid encoding, try to find an end. */
61 end = strstr(bptr, "?=");
66 * Handle subjects with RFC2047 encoding such as:
67 * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
69 void utf8ify_rfc822_string(char **buf) {
70 char *start, *end, *next, *nextend, *ptr;
75 iconv_t ic = (iconv_t)(-1) ;
76 char *ibuf; /**< Buffer of characters to be converted */
77 char *obuf; /**< Buffer for converted characters */
78 size_t ibuflen; /**< Length of input buffer */
79 size_t obuflen; /**< Length of output buffer */
80 char *isav; /**< Saved pointer to input buffer */
81 char *osav; /**< Saved pointer to output buffer */
84 int illegal_non_rfc2047_encoding = 0;
86 /* Sometimes, badly formed messages contain strings which were simply
87 * written out directly in some foreign character set instead of
88 * using RFC2047 encoding. This is illegal but we will attempt to
89 * handle it anyway by converting from a user-specified default
90 * charset to UTF-8 if we see any nonprintable characters.
93 for (i=0; i<len; ++i) {
94 if (((*buf)[i] < 32) || ((*buf)[i] > 126)) {
95 illegal_non_rfc2047_encoding = 1;
96 i = len; /*< take a shortcut, it won't be more than one. */
99 if (illegal_non_rfc2047_encoding) {
100 StrBuf *default_header_charset;
101 get_preference("default_header_charset", &default_header_charset);
102 if ( (strcasecmp(ChrPtr(default_header_charset), "UTF-8")) &&
103 (strcasecmp(ChrPtr(default_header_charset), "us-ascii")) ) {
104 ctdl_iconv_open("UTF-8", ChrPtr(default_header_charset), &ic);
105 if (ic != (iconv_t)(-1) ) {
108 safestrncpy(ibuf, *buf, 1023);
109 ibuflen = strlen(ibuf);
111 obuf = (char *) malloc(obuflen);
113 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
114 osav[1023-obuflen] = 0;
123 /* pre evaluate the first pair */
124 nextend = end = NULL;
126 start = strstr(*buf, "=?");
128 end = FindNextEnd (start);
130 while ((start != NULL) && (end != NULL))
132 next = strstr(end, "=?");
134 nextend = FindNextEnd(next);
138 /* did we find two partitions */
139 if ((next != NULL) &&
143 while ((ptr < next) &&
149 /* did we find a gab just filled with blanks? */
154 len - (next - start));
156 /* now terminate the gab at the end */
157 delta = (next - end) - 2;
161 /* move next to its new location. */
166 /* our next-pair is our new first pair now. */
171 /* Now we handle foreign character sets properly encoded
174 while (start=strstr((*buf), "=?"), end=FindNextEnd((start != NULL)? start : (*buf)),
175 ((start != NULL) && (end != NULL) && (end > start)) )
177 extract_token(charset, start, 1, '?', sizeof charset);
178 extract_token(encoding, start, 2, '?', sizeof encoding);
179 extract_token(istr, start, 3, '?', sizeof istr);
183 if (!strcasecmp(encoding, "B")) { /**< base64 */
184 ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
186 else if (!strcasecmp(encoding, "Q")) { /**< quoted-printable */
194 if (istr[pos] == '_') istr[pos] = ' ';
198 ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
201 strcpy(ibuf, istr); /**< unknown encoding */
202 ibuflen = strlen(istr);
205 ctdl_iconv_open("UTF-8", charset, &ic);
206 if (ic != (iconv_t)(-1) ) {
208 obuf = (char *) malloc(obuflen);
210 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
211 osav[1024-obuflen] = 0;
216 remove_token(end, 0, '?');
217 remove_token(end, 0, '?');
218 remove_token(end, 0, '?');
219 remove_token(end, 0, '?');
220 strcpy(end, &end[1]);
222 snprintf(newbuf, sizeof newbuf, "%s%s%s", *buf, osav, end);
223 strcpy(*buf, newbuf);
232 remove_token(end, 0, '?');
233 remove_token(end, 0, '?');
234 remove_token(end, 0, '?');
235 remove_token(end, 0, '?');
236 strcpy(end, &end[1]);
238 snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", *buf, end);
239 strcpy(*buf, newbuf);
245 * Since spammers will go to all sorts of absurd lengths to get their
246 * messages through, there are LOTS of corrupt headers out there.
247 * So, prevent a really badly formed RFC2047 header from throwing
248 * this function into an infinite loop.
251 if (passes > 20) return;
256 inline void utf8ify_rfc822_string(char **a){};
264 * \brief RFC2047-encode a header field if necessary.
265 * If no non-ASCII characters are found, the string
266 * will be copied verbatim without encoding.
268 * \param target Target buffer.
269 * \param maxlen Maximum size of target buffer.
270 * \param source Source string to be encoded.
271 * \param SourceLen Length of the source string
272 * \returns encoded length; -1 if non success.
274 int webcit_rfc2047encode(char *target, int maxlen, char *source, long SourceLen)
276 const char headerStr[] = "=?UTF-8?Q?";
277 int need_to_encode = 0;
282 if ((source == NULL) ||
284 (SourceLen > maxlen)) return -1;
286 while ((!IsEmptyStr (&source[i])) &&
287 (need_to_encode == 0) &&
289 if (((unsigned char) source[i] < 32) ||
290 ((unsigned char) source[i] > 126)) {
296 if (!need_to_encode) {
297 memcpy (target, source, SourceLen);
298 target[SourceLen] = '\0';
302 if (sizeof (headerStr + SourceLen + 2) > maxlen)
304 memcpy (target, headerStr, sizeof (headerStr));
305 len = sizeof (headerStr) - 1;
306 for (i=0; (i < SourceLen) && (len + 3< maxlen) ; ++i) {
307 ch = (unsigned char) source[i];
308 if ((ch < 32) || (ch > 126) || (ch == 61)) {
309 sprintf(&target[len], "=%02X", ch);
313 sprintf(&target[len], "%c", ch);
318 if (len + 2 < maxlen) {
319 strcat(&target[len], "?=");