2 * Copyright (c) 1996-2012 by the citadel.org team
4 * This program is open source software. You can redistribute it and/or
5 * modify it under the terms of the GNU General Public License, version 3.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
17 * Wrapper around iconv_open()
18 * Our version adds aliases for non-standard Microsoft charsets
19 * such as 'MS950', aliasing them to names like 'CP950'
21 * tocode Target encoding
22 * fromcode Source encoding
24 iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode)
26 iconv_t ic = (iconv_t)(-1) ;
27 ic = iconv_open(tocode, fromcode);
28 if (ic == (iconv_t)(-1) ) {
29 char alias_fromcode[64];
30 if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) {
31 safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode);
32 alias_fromcode[0] = 'C';
33 alias_fromcode[1] = 'P';
34 ic = iconv_open(tocode, alias_fromcode);
42 static inline char *FindNextEnd (char *bptr)
45 /* Find the next ?Q? */
46 end = strchr(bptr + 2, '?');
47 if (end == NULL) return NULL;
48 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) &&
49 (*(end + 2) == '?')) {
50 /* skip on to the end of the cluster, the next ?= */
51 end = strstr(end + 3, "?=");
54 /* sort of half valid encoding, try to find an end. */
55 end = strstr(bptr, "?=");
60 * Handle subjects with RFC2047 encoding such as:
61 * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
63 void utf8ify_rfc822_string(char **buf) {
64 char *start, *end, *next, *nextend, *ptr;
69 iconv_t ic = (iconv_t)(-1) ;
70 char *ibuf; /**< Buffer of characters to be converted */
71 char *obuf; /**< Buffer for converted characters */
72 size_t ibuflen; /**< Length of input buffer */
73 size_t obuflen; /**< Length of output buffer */
74 char *isav; /**< Saved pointer to input buffer */
75 char *osav; /**< Saved pointer to output buffer */
78 int illegal_non_rfc2047_encoding = 0;
80 /* Sometimes, badly formed messages contain strings which were simply
81 * written out directly in some foreign character set instead of
82 * using RFC2047 encoding. This is illegal but we will attempt to
83 * handle it anyway by converting from a user-specified default
84 * charset to UTF-8 if we see any nonprintable characters.
87 for (i=0; i<len; ++i) {
88 if (((*buf)[i] < 32) || ((*buf)[i] > 126)) {
89 illegal_non_rfc2047_encoding = 1;
90 i = len; /*< take a shortcut, it won't be more than one. */
93 if (illegal_non_rfc2047_encoding) {
94 StrBuf *default_header_charset;
95 get_preference("default_header_charset", &default_header_charset);
96 if ( (strcasecmp(ChrPtr(default_header_charset), "UTF-8")) &&
97 (strcasecmp(ChrPtr(default_header_charset), "us-ascii")) ) {
98 ctdl_iconv_open("UTF-8", ChrPtr(default_header_charset), &ic);
99 if (ic != (iconv_t)(-1) ) {
102 safestrncpy(ibuf, *buf, 1023);
103 ibuflen = strlen(ibuf);
105 obuf = (char *) malloc(obuflen);
107 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
108 osav[1023-obuflen] = 0;
117 /* pre evaluate the first pair */
118 nextend = end = NULL;
120 start = strstr(*buf, "=?");
122 end = FindNextEnd (start);
124 while ((start != NULL) && (end != NULL))
126 next = strstr(end, "=?");
128 nextend = FindNextEnd(next);
132 /* did we find two partitions */
133 if ((next != NULL) &&
137 while ((ptr < next) &&
143 /* did we find a gab just filled with blanks? */
148 len - (next - start));
150 /* now terminate the gab at the end */
151 delta = (next - end) - 2;
155 /* move next to its new location. */
160 /* our next-pair is our new first pair now. */
165 /* Now we handle foreign character sets properly encoded
168 while (start=strstr((*buf), "=?"), end=FindNextEnd((start != NULL)? start : (*buf)),
169 ((start != NULL) && (end != NULL) && (end > start)) )
171 extract_token(charset, start, 1, '?', sizeof charset);
172 extract_token(encoding, start, 2, '?', sizeof encoding);
173 extract_token(istr, start, 3, '?', sizeof istr);
177 if (!strcasecmp(encoding, "B")) { /**< base64 */
178 ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
180 else if (!strcasecmp(encoding, "Q")) { /**< quoted-printable */
188 if (istr[pos] == '_') istr[pos] = ' ';
192 ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
195 strcpy(ibuf, istr); /**< unknown encoding */
196 ibuflen = strlen(istr);
199 ctdl_iconv_open("UTF-8", charset, &ic);
200 if (ic != (iconv_t)(-1) ) {
202 obuf = (char *) malloc(obuflen);
204 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
205 osav[1024-obuflen] = 0;
210 remove_token(end, 0, '?');
211 remove_token(end, 0, '?');
212 remove_token(end, 0, '?');
213 remove_token(end, 0, '?');
214 strcpy(end, &end[1]);
216 snprintf(newbuf, sizeof newbuf, "%s%s%s", *buf, osav, end);
217 strcpy(*buf, newbuf);
226 remove_token(end, 0, '?');
227 remove_token(end, 0, '?');
228 remove_token(end, 0, '?');
229 remove_token(end, 0, '?');
230 strcpy(end, &end[1]);
232 snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", *buf, end);
233 strcpy(*buf, newbuf);
239 * Since spammers will go to all sorts of absurd lengths to get their
240 * messages through, there are LOTS of corrupt headers out there.
241 * So, prevent a really badly formed RFC2047 header from throwing
242 * this function into an infinite loop.
245 if (passes > 20) return;
250 inline void utf8ify_rfc822_string(char **a){};
258 * \brief RFC2047-encode a header field if necessary.
259 * If no non-ASCII characters are found, the string
260 * will be copied verbatim without encoding.
262 * \param target Target buffer.
263 * \param maxlen Maximum size of target buffer.
264 * \param source Source string to be encoded.
265 * \param SourceLen Length of the source string
266 * \returns encoded length; -1 if non success.
268 int webcit_rfc2047encode(char *target, int maxlen, char *source, long SourceLen)
270 const char headerStr[] = "=?UTF-8?Q?";
271 int need_to_encode = 0;
276 if ((source == NULL) ||
278 (SourceLen > maxlen)) return -1;
280 while ((!IsEmptyStr (&source[i])) &&
281 (need_to_encode == 0) &&
283 if (((unsigned char) source[i] < 32) ||
284 ((unsigned char) source[i] > 126)) {
290 if (!need_to_encode) {
291 memcpy (target, source, SourceLen);
292 target[SourceLen] = '\0';
296 if (sizeof (headerStr + SourceLen + 2) > maxlen)
298 memcpy (target, headerStr, sizeof (headerStr));
299 len = sizeof (headerStr) - 1;
300 for (i=0; (i < SourceLen) && (len + 3< maxlen) ; ++i) {
301 ch = (unsigned char) source[i];
302 if ((ch < 32) || (ch > 126) || (ch == 61)) {
303 sprintf(&target[len], "=%02X", ch);
307 sprintf(&target[len], "%c", ch);
312 if (len + 2 < maxlen) {
313 strcat(&target[len], "?=");