* serv_crypto.c: made changes to OpenSSL calls ... removed unnecessary
[citadel.git] / webcit / mime_parser.c
1 /*
2  * $Id$
3  *
4  * This is the MIME parser for Citadel.  Sometimes it actually works.
5  *
6  * Copyright (c) 1998-2003 by Art Cancro
7  * This code is distributed under the terms of the GNU General Public License.
8  *
9  */
10
11
12 #include <stdlib.h>
13 #include <unistd.h>
14 #include <stdio.h>
15 #include <signal.h>
16 #include <sys/types.h>
17 #include <ctype.h>
18 #include <string.h>
19 #include <sys/stat.h>
20 #include <errno.h>
21
22 #include "webcit.h"
23 #include "mime_parser.h"
24
25
26 void extract_key(char *target, char *source, char *key)
27 {
28         int a, b;
29
30         strcpy(target, source);
31         for (a = 0; a < strlen(target); ++a) {
32                 if ((!strncasecmp(&target[a], key, strlen(key)))
33                     && (target[a + strlen(key)] == '=')) {
34                         strcpy(target, &target[a + strlen(key) + 1]);
35                         if (target[0] == 34)
36                                 strcpy(target, &target[1]);
37                         for (b = 0; b < strlen(target); ++b)
38                                 if (target[b] == 34)
39                                         target[b] = 0;
40                         return;
41                 }
42         }
43         strcpy(target, "");
44 }
45
46
47 /*
48  * For non-multipart messages, we need to generate a quickie partnum of "1"
49  * to return to callback functions.  Some callbacks demand it.
50  */
51 char *fixed_partnum(char *supplied_partnum) {
52         if (supplied_partnum == NULL) return "1";
53         if (strlen(supplied_partnum)==0) return "1";
54         return supplied_partnum;
55 }
56
57
58
59 /*
60  * Convert "quoted-printable" to binary.  Returns number of bytes decoded.
61  */
62 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
63         char buf[SIZ];
64         int buf_length = 0;
65         int soft_line_break = 0;
66         int ch;
67         int decoded_length = 0;
68         int i;
69
70         decoded[0] = 0;
71         decoded_length = 0;
72         buf[0] = 0;
73         buf_length = 0;
74
75         for (i = 0; i < sourcelen; ++i) {
76
77                 buf[buf_length++] = encoded[i];
78
79                 if ( (encoded[i] == '\n')
80                    || (encoded[i] == 0)
81                    || (i == (sourcelen-1)) ) {
82                         buf[buf_length++] = 0;
83
84                         /*** begin -- process one line ***/
85
86                         if (buf[strlen(buf)-1] == '\n') {
87                                 buf[strlen(buf)-1] = 0;
88                         }
89                         if (buf[strlen(buf)-1] == '\r') {
90                                 buf[strlen(buf)-1] = 0;
91                         }
92                         while (isspace(buf[strlen(buf)-1])) {
93                                 buf[strlen(buf)-1] = 0;
94                         }
95                         soft_line_break = 0;
96
97                         while (strlen(buf) > 0) {
98                                 if (!strcmp(buf, "=")) {
99                                         soft_line_break = 1;
100                                         strcpy(buf, "");
101                                 } else if ((strlen(buf)>=3) && (buf[0]=='=')) {
102                                         sscanf(&buf[1], "%02x", &ch);
103                                         decoded[decoded_length++] = ch;
104                                         strcpy(buf, &buf[3]);
105                                 } else {
106                                         decoded[decoded_length++] = buf[0];
107                                         strcpy(buf, &buf[1]);
108                                 }
109                         }
110                         if (soft_line_break == 0) {
111                                 decoded[decoded_length++] = '\r';
112                                 decoded[decoded_length++] = '\n';
113                         }
114                         buf_length = 0;
115                         /*** end -- process one line ***/
116                 }
117         }
118
119         decoded[decoded_length++] = 0;
120         return(decoded_length);
121 }
122
123 /*
124  * Given a message or message-part body and a length, handle any necessary
125  * decoding and pass the request up the stack.
126  */
127 void mime_decode(char *partnum,
128                  char *part_start, size_t length,
129                  char *content_type, char *encoding,
130                  char *disposition,
131                  char *name, char *filename,
132                  void (*CallBack)
133                   (char *cbname,
134                    char *cbfilename,
135                    char *cbpartnum,
136                    char *cbdisp,
137                    void *cbcontent,
138                    char *cbtype,
139                    size_t cblength,
140                    char *cbencoding,
141                    void *cbuserdata),
142                  void (*PreMultiPartCallBack)
143                   (char *cbname,
144                    char *cbfilename,
145                    char *cbpartnum,
146                    char *cbdisp,
147                    void *cbcontent,
148                    char *cbtype,
149                    size_t cblength,
150                    char *cbencoding,
151                    void *cbuserdata),
152                  void (*PostMultiPartCallBack)
153                   (char *cbname,
154                    char *cbfilename,
155                    char *cbpartnum,
156                    char *cbdisp,
157                    void *cbcontent,
158                    char *cbtype,
159                    size_t cblength,
160                    char *cbencoding,
161                    void *cbuserdata),
162                   void *userdata,
163                   int dont_decode
164 )
165 {
166
167         char *decoded;
168         size_t bytes_decoded = 0;
169
170         /* Some encodings aren't really encodings */
171         if (!strcasecmp(encoding, "7bit"))
172                 strcpy(encoding, "");
173         if (!strcasecmp(encoding, "8bit"))
174                 strcpy(encoding, "");
175         if (!strcasecmp(encoding, "binary"))
176                 strcpy(encoding, "");
177
178         /* If this part is not encoded, send as-is */
179         if ( (strlen(encoding) == 0) || (dont_decode)) {
180                 if (CallBack != NULL) {
181                         CallBack(name, filename, fixed_partnum(partnum),
182                                 disposition, part_start,
183                                 content_type, length, encoding, userdata);
184                         }
185                 return;
186         }
187         
188         if ((strcasecmp(encoding, "base64"))
189             && (strcasecmp(encoding, "quoted-printable"))) {
190                 return;
191         }
192         /*
193          * Allocate a buffer for the decoded data.  The output buffer is the
194          * same size as the input buffer; this assumes that the decoded data
195          * will never be larger than the encoded data.  This is a safe
196          * assumption with base64, uuencode, and quoted-printable.
197          */
198         decoded = mallok(length+2048);
199         if (decoded == NULL) {
200                 return;
201         }
202
203         if (!strcasecmp(encoding, "base64")) {
204                 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
205         }
206         else if (!strcasecmp(encoding, "quoted-printable")) {
207                 bytes_decoded = CtdlDecodeQuotedPrintable(decoded,
208                                                         part_start, length);
209         }
210
211         if (bytes_decoded > 0) if (CallBack != NULL) {
212                 CallBack(name, filename, fixed_partnum(partnum),
213                         disposition, decoded,
214                         content_type, bytes_decoded, "binary", userdata);
215         }
216
217         phree(decoded);
218 }
219
220 /*
221  * Break out the components of a multipart message
222  * (This function expects to be fed HEADERS + CONTENT)
223  * Note: NULL can be supplied as content_end; in this case, the message is
224  * considered to have ended when the parser encounters a 0x00 byte.
225  */
226 void the_mime_parser(char *partnum,
227                      char *content_start, char *content_end,
228                      void (*CallBack)
229                       (char *cbname,
230                        char *cbfilename,
231                        char *cbpartnum,
232                        char *cbdisp,
233                        void *cbcontent,
234                        char *cbtype,
235                        size_t cblength,
236                        char *cbencoding,
237                        void *cbuserdata),
238                      void (*PreMultiPartCallBack)
239                       (char *cbname,
240                        char *cbfilename,
241                        char *cbpartnum,
242                        char *cbdisp,
243                        void *cbcontent,
244                        char *cbtype,
245                        size_t cblength,
246                        char *cbencoding,
247                        void *cbuserdata),
248                      void (*PostMultiPartCallBack)
249                       (char *cbname,
250                        char *cbfilename,
251                        char *cbpartnum,
252                        char *cbdisp,
253                        void *cbcontent,
254                        char *cbtype,
255                        size_t cblength,
256                        char *cbencoding,
257                        void *cbuserdata),
258                       void *userdata,
259                       int dont_decode
260 )
261 {
262
263         char *ptr;
264         char *part_start, *part_end = NULL;
265         char buf[SIZ];
266         char *header;
267         char *boundary;
268         char *startary;
269         char *endary;
270         char *content_type;
271         size_t content_length;
272         char *encoding;
273         char *disposition;
274         char *name = NULL;
275         char *content_type_name;
276         char *content_disposition_name;
277         char *filename;
278         int is_multipart;
279         int part_seq = 0;
280         int i;
281         size_t length;
282         char nested_partnum[SIZ];
283
284         ptr = content_start;
285         content_length = 0;
286
287         boundary = mallok(SIZ);
288         memset(boundary, 0, SIZ);
289
290         startary = mallok(SIZ);
291         memset(startary, 0, SIZ);
292
293         endary = mallok(SIZ);
294         memset(endary, 0, SIZ);
295
296         header = mallok(SIZ);
297         memset(header, 0, SIZ);
298
299         content_type = mallok(SIZ);
300         memset(content_type, 0, SIZ);
301
302         encoding = mallok(SIZ);
303         memset(encoding, 0, SIZ);
304
305         content_type_name = mallok(SIZ);
306         memset(content_type_name, 0, SIZ);
307
308         content_disposition_name = mallok(SIZ);
309         memset(content_disposition_name, 0, SIZ);
310
311         filename = mallok(SIZ);
312         memset(filename, 0, SIZ);
313
314         disposition = mallok(SIZ);
315         memset(disposition, 0, SIZ);
316
317         /* If the caller didn't supply an endpointer, generate one by measure */
318         if (content_end == NULL) {
319                 content_end = &content_start[strlen(content_start)];
320         }
321
322         /* Learn interesting things from the headers */
323         strcpy(header, "");
324         do {
325                 ptr = memreadline(ptr, buf, SIZ);
326                 if (ptr >= content_end) {
327                         goto end_parser;
328                 }
329
330                 for (i = 0; i < strlen(buf); ++i)
331                         if (isspace(buf[i]))
332                                 buf[i] = ' ';
333                 if (!isspace(buf[0])) {
334                         if (!strncasecmp(header, "Content-type: ", 14)) {
335                                 strcpy(content_type, &header[14]);
336                                 extract_key(content_type_name, content_type, "name");
337                                 /* Deal with weird headers */
338                                 if (strchr(content_type, ' '))
339                                         *(strchr(content_type, ' ')) = '\0';
340                                 if (strchr(content_type, ';'))
341                                         *(strchr(content_type, ';')) = '\0';
342                         }
343                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
344                                 strcpy(disposition, &header[21]);
345                                 extract_key(content_disposition_name, disposition, "name");
346                                 extract_key(filename, disposition, "filename");
347                         }
348                         if (!strncasecmp(header, "Content-length: ", 16)) {
349                                 content_length = (size_t) atol(&header[16]);
350                         }
351                         if (!strncasecmp(header,
352                                       "Content-transfer-encoding: ", 27))
353                                 strcpy(encoding, &header[27]);
354                         if (strlen(boundary) == 0)
355                                 extract_key(boundary, header, "boundary");
356                         strcpy(header, "");
357                 }
358                 if ((strlen(header) + strlen(buf) + 2) < SIZ)
359                         strcat(header, buf);
360         } while ((strlen(buf) > 0) && (*ptr != 0));
361
362         if (strchr(disposition, ';'))
363                 *(strchr(disposition, ';')) = '\0';
364         striplt(disposition);
365         if (strchr(content_type, ';'))
366                 *(strchr(content_type, ';')) = '\0';
367         striplt(content_type);
368
369         if (strlen(boundary) > 0) {
370                 is_multipart = 1;
371         } else {
372                 is_multipart = 0;
373         }
374
375         /* If this is a multipart message, then recursively process it */
376         part_start = NULL;
377         if (is_multipart) {
378
379                 /* Tell the client about this message's multipartedness */
380                 if (PreMultiPartCallBack != NULL) {
381                         PreMultiPartCallBack("", "", partnum, "",
382                                 NULL, content_type,
383                                 0, encoding, userdata);
384                 }
385
386                 /* Figure out where the boundaries are */
387                 snprintf(startary, SIZ, "--%s", boundary);
388                 snprintf(endary, SIZ, "--%s--", boundary);
389                 do {
390                         if ( (!strncasecmp(ptr, startary, strlen(startary)))
391                            || (!strncasecmp(ptr, endary, strlen(endary))) ) {
392                                 if (part_start != NULL) {
393                                         if (strlen(partnum) > 0) {
394                                                 snprintf(nested_partnum,
395                                                          sizeof nested_partnum,
396                                                          "%s.%d", partnum,
397                                                          ++part_seq);
398                                         }
399                                         else {
400                                                 snprintf(nested_partnum,
401                                                          sizeof nested_partnum,
402                                                          "%d", ++part_seq);
403                                         }
404                                         the_mime_parser(nested_partnum,
405                                                     part_start, part_end,
406                                                         CallBack,
407                                                         PreMultiPartCallBack,
408                                                         PostMultiPartCallBack,
409                                                         userdata,
410                                                         dont_decode);
411                                 }
412                                 ptr = memreadline(ptr, buf, SIZ);
413                                 part_start = ptr;
414                         }
415                         else {
416                                 part_end = ptr;
417                                 ++ptr;
418                         }
419                         /* If we pass out of scope in the MIME multipart (by
420                          * hitting the end boundary), force the pointer out
421                          * of scope so this loop ends.
422                          */
423                         if (ptr < content_end) {
424                                 if (!strcasecmp(ptr, endary)) {
425                                         ptr = content_end++;
426                                 }
427                         }
428                 } while (ptr <= content_end);
429                 if (PostMultiPartCallBack != NULL) {
430                         PostMultiPartCallBack("", "", partnum, "", NULL,
431                                 content_type, 0, encoding, userdata);
432                 }
433                 goto end_parser;
434         }
435
436         /* If it's not a multipart message, then do something with it */
437         if (!is_multipart) {
438                 part_start = ptr;
439                 length = 0;
440                 while (ptr < content_end) {
441                         ++ptr;
442                         ++length;
443                 }
444                 part_end = content_end;
445                 /* fix an off-by-one error */
446                 --part_end;
447                 --length;
448                 
449                 /* Truncate if the header told us to */
450                 if ( (content_length > 0) && (length > content_length) ) {
451                         length = content_length;
452                 }
453
454                 /* Sometimes the "name" field is tacked on to Content-type,
455                  * and sometimes it's tacked on to Content-disposition.  Use
456                  * whichever one we have.
457                  */
458                 if (strlen(content_disposition_name) > strlen(content_type_name)) {
459                         name = content_disposition_name;
460                 }
461                 else {
462                         name = content_type_name;
463                 }
464                 
465                 mime_decode(partnum,
466                             part_start, length,
467                             content_type, encoding, disposition,
468                             name, filename,
469                             CallBack, NULL, NULL,
470                             userdata, dont_decode);
471         }
472
473 end_parser:     /* free the buffers!  end the oppression!! */
474         phree(boundary);
475         phree(startary);
476         phree(endary);  
477         phree(header);
478         phree(content_type);
479         phree(encoding);
480         phree(content_type_name);
481         phree(content_disposition_name);
482         phree(filename);
483         phree(disposition);
484 }
485
486
487
488 /*
489  * Entry point for the MIME parser.
490  * (This function expects to be fed HEADERS + CONTENT)
491  * Note: NULL can be supplied as content_end; in this case, the message is
492  * considered to have ended when the parser encounters a 0x00 byte.
493  */
494 void mime_parser(char *content_start,
495                 char *content_end,
496
497                  void (*CallBack)
498                   (char *cbname,
499                    char *cbfilename,
500                    char *cbpartnum,
501                    char *cbdisp,
502                    void *cbcontent,
503                    char *cbtype,
504                    size_t cblength,
505                    char *cbencoding,
506                    void *cbuserdata),
507
508                  void (*PreMultiPartCallBack)
509                   (char *cbname,
510                    char *cbfilename,
511                    char *cbpartnum,
512                    char *cbdisp,
513                    void *cbcontent,
514                    char *cbtype,
515                    size_t cblength,
516                    char *cbencoding,
517                    void *cbuserdata),
518
519                  void (*PostMultiPartCallBack)
520                   (char *cbname,
521                    char *cbfilename,
522                    char *cbpartnum,
523                    char *cbdisp,
524                    void *cbcontent,
525                    char *cbtype,
526                    size_t cblength,
527                    char *cbencoding,
528                    void *cbuserdata),
529
530                   void *userdata,
531                   int dont_decode
532 )
533 {
534
535         the_mime_parser("", content_start, content_end,
536                         CallBack,
537                         PreMultiPartCallBack,
538                         PostMultiPartCallBack,
539                         userdata, dont_decode);
540 }
541