moved whitespace around
[citadel.git] / libcitadel / lib / decode.c
1 // Copyright (c) 1996-2022 by the citadel.org team
2 //
3 // This program is open source software.  Use, duplication, or disclosure
4 // is subject to the terms of the GNU General Public License v3.
5
6
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <signal.h>
11 #include <sys/types.h>
12 #include <ctype.h>
13 #include <string.h>
14 #include <sys/stat.h>
15 #include <errno.h>
16 #include <limits.h>
17 #include <iconv.h>
18 #include <time.h>
19 #include "libcitadel.h"
20
21
22 #define FindNextEnd(bptr, end) { \
23         end = strchr(bptr + 2, '?'); \
24         if (end != NULL) { \
25                 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) && (*(end + 2) == '?')) { \
26                         end = strstr(end + 3, "?="); \
27                 } else end = strstr(bptr, "?="); \
28         } \
29 }
30
31
32 // Handle subjects with RFC2047 encoding such as:
33 // =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
34 void utf8ify_rfc822_string(char *buf) {
35         char *start, *end, *next, *nextend, *ptr;
36         char newbuf[1024];
37         char charset[128];
38         char encoding[16];
39         char istr[1024];
40         iconv_t ic = (iconv_t)(-1) ;
41         char *ibuf;                     // Buffer of characters to be converted
42         char *obuf;                     // Buffer for converted characters
43         size_t ibuflen;                 // Length of input buffer
44         size_t obuflen;                 // Length of output buffer
45         char *isav;                     // Saved pointer to input buffer
46         char *osav;                     // Saved pointer to output buffer
47         int passes = 0;
48         int i, len, delta;
49         int illegal_non_rfc2047_encoding = 0;
50
51         // Sometimes, badly formed messages contain strings which were simply
52         // written out directly in some foreign character set instead of
53         // using RFC2047 encoding.  This is illegal but we will attempt to
54         // handle it anyway by converting from a user-specified default
55         // charset to UTF-8 if we see any nonprintable characters.
56         len = strlen(buf);
57         for (i=0; i<len; ++i) {
58                 if ((buf[i] < 32) || (buf[i] > 126)) {
59                         illegal_non_rfc2047_encoding = 1;
60                         i = len;        // take a shortcut, it won't be more than one.
61                 }
62         }
63         if (illegal_non_rfc2047_encoding) {
64                 const char *default_header_charset = "iso-8859-1";
65                 if ( (strcasecmp(default_header_charset, "UTF-8")) && (strcasecmp(default_header_charset, "us-ascii")) ) {
66                         ctdl_iconv_open("UTF-8", default_header_charset, &ic);
67                         if (ic != (iconv_t)(-1) ) {
68                                 ibuf = malloc(1024);
69                                 isav = ibuf;
70                                 safestrncpy(ibuf, buf, 1024);
71                                 ibuflen = strlen(ibuf);
72                                 obuflen = 1024;
73                                 obuf = (char *) malloc(obuflen);
74                                 osav = obuf;
75                                 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
76                                 osav[1024-obuflen] = 0;
77                                 strcpy(buf, osav);
78                                 free(osav);
79                                 iconv_close(ic);
80                                 free(isav);
81                         }
82                 }
83         }
84
85         // pre evaluate the first pair
86         nextend = end = NULL;
87         len = strlen(buf);
88         start = strstr(buf, "=?");
89         if (start != NULL) 
90                 FindNextEnd (start, end);
91
92         while ((start != NULL) && (end != NULL)) {
93                 next = strstr(end, "=?");
94                 if (next != NULL)
95                         FindNextEnd(next, nextend);
96                 if (nextend == NULL)
97                         next = NULL;
98
99                 // did we find two partitions
100                 if ((next != NULL) && ((next - end) > 2)) {
101                         ptr = end + 2;
102                         while ((ptr < next) && 
103                                (isspace(*ptr) ||
104                                 (*ptr == '\r') ||
105                                 (*ptr == '\n') || 
106                                 (*ptr == '\t')))
107                                 ptr ++;
108                         // did we find a gab just filled with blanks?
109                         if (ptr == next) {
110                                 memmove(end + 2, next, len - (next - start));
111
112                                 // now terminate the gab at the end
113                                 delta = (next - end) - 2;
114                                 len -= delta;
115                                 buf[len] = '\0';
116
117                                 // move next to its new location.
118                                 next -= delta;
119                                 nextend -= delta;
120                         }
121                 }
122                 // our next-pair is our new first pair now.
123                 start = next;
124                 end = nextend;
125         }
126
127         // Now we handle foreign character sets properly encoded in RFC2047 format.
128         start = strstr(buf, "=?");
129         FindNextEnd((start != NULL)? start : buf, end);
130         while (start != NULL && end != NULL && end > start) {
131                 extract_token(charset, start, 1, '?', sizeof charset);
132                 extract_token(encoding, start, 2, '?', sizeof encoding);
133                 extract_token(istr, start, 3, '?', sizeof istr);
134
135                 ibuf = malloc(1024);
136                 isav = ibuf;
137                 if (!strcasecmp(encoding, "B")) {       // base64
138                         ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
139                 }
140                 else if (!strcasecmp(encoding, "Q")) {  // quoted-printable
141                         size_t len;
142                         unsigned long pos;
143                         
144                         len = strlen(istr);
145                         pos = 0;
146                         while (pos < len) {
147                                 if (istr[pos] == '_') istr[pos] = ' ';
148                                 pos++;
149                         }
150                         ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
151                 }
152                 else {
153                         strcpy(ibuf, istr);             // unknown encoding
154                         ibuflen = strlen(istr);
155                 }
156
157                 ctdl_iconv_open("UTF-8", charset, &ic);
158                 if (ic != (iconv_t)(-1) ) {
159                         obuflen = 1024;
160                         obuf = (char *) malloc(obuflen);
161                         osav = obuf;
162                         iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
163                         osav[1024-obuflen] = 0;
164
165                         end = start;
166                         end++;
167                         strcpy(start, "");
168                         remove_token(end, 0, '?');
169                         remove_token(end, 0, '?');
170                         remove_token(end, 0, '?');
171                         remove_token(end, 0, '?');
172                         strcpy(end, &end[1]);
173
174                         snprintf(newbuf, sizeof newbuf, "%s%s%s", buf, osav, end);
175                         strcpy(buf, newbuf);
176                         free(osav);
177                         iconv_close(ic);
178                 }
179                 else {
180                         end = start;
181                         end++;
182                         strcpy(start, "");
183                         remove_token(end, 0, '?');
184                         remove_token(end, 0, '?');
185                         remove_token(end, 0, '?');
186                         remove_token(end, 0, '?');
187                         strcpy(end, &end[1]);
188
189                         snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", buf, end);
190                         strcpy(buf, newbuf);
191                 }
192
193                 free(isav);
194
195                 // Since spammers will go to all sorts of absurd lengths to get their
196                 // messages through, there are LOTS of corrupt headers out there.
197                 // So, prevent a really badly formed RFC2047 header from throwing
198                 // this function into an infinite loop.
199                 ++passes;
200                 if (passes > 20) return;
201
202                 start = strstr(buf, "=?");
203                 FindNextEnd((start != NULL)? start : buf, end);
204         }
205
206 }