* Worked out the remaining bugs in IMAP FETCH for the BODYSTRUCTURE and
[citadel.git] / citadel / mime_parser.c
1 /*
2  * $Id$
3  *
4  * This is a really bad attempt at writing a parser to handle MIME-encoded
5  * messages.
6  *
7  * Copyright (c) 1998-1999 by Art Cancro
8  * This code is distributed under the terms of the GNU General Public License.
9  *
10  */
11
12 #include "sysdep.h"
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <stdio.h>
16 #include <signal.h>
17 #include <sys/types.h>
18 #include <ctype.h>
19 #include <string.h>
20 #include <sys/stat.h>
21 #include <errno.h>
22 #include "citadel.h"
23 #include "mime_parser.h"
24 #include "sysdep_decls.h"
25 #include "server.h"
26
27
28
29 void extract_key(char *target, char *source, char *key)
30 {
31         int a, b;
32
33         strcpy(target, source);
34         for (a = 0; a < strlen(target); ++a) {
35                 if ((!strncasecmp(&target[a], key, strlen(key)))
36                     && (target[a + strlen(key)] == '=')) {
37                         strcpy(target, &target[a + strlen(key) + 1]);
38                         if (target[0] == 34)
39                                 strcpy(target, &target[1]);
40                         for (b = 0; b < strlen(target); ++b)
41                                 if (target[b] == 34)
42                                         target[b] = 0;
43                         return;
44                 }
45         }
46         strcpy(target, "");
47 }
48
49
50
51 /* 
52  * Utility function to "readline" from memory
53  * (returns new pointer)
54  */
55 char *memreadline(char *start, char *buf, int maxlen)
56 {
57         char ch;
58         char *ptr;
59
60         ptr = start;
61         memset(buf, 0, maxlen);
62
63         while (1) {
64                 ch = *ptr++;
65                 if ((ch == 10) || (ch == 0)) {
66                         if (strlen(buf) > 0)
67                                 if (buf[strlen(buf) - 1] == 13)
68                                         buf[strlen(buf) - 1] = 0;
69                         return ptr;
70                 }
71                 if (strlen(buf) < (maxlen - 1)) {
72                         buf[strlen(buf) + 1] = 0;
73                         buf[strlen(buf)] = ch;
74                 }
75         }
76 }
77
78
79 /*
80  * For non-multipart messages, we need to generate a quickie partnum of "1"
81  * to return to callback functions.  Some callbacks demand it.
82  */
83 char *fixed_partnum(char *supplied_partnum) {
84         if (supplied_partnum == NULL) return "1";
85         if (strlen(supplied_partnum)==0) return "1";
86         return supplied_partnum;
87 }
88
89
90 /*
91  * Given a message or message-part body and a length, handle any necessary
92  * decoding and pass the request up the stack.
93  */
94 void mime_decode(char *partnum,
95                  char *part_start, size_t length,
96                  char *content_type, char *encoding,
97                  char *disposition,
98                  char *name, char *filename,
99                  void (*CallBack)
100                   (char *cbname,
101                    char *cbfilename,
102                    char *cbpartnum,
103                    char *cbdisp,
104                    void *cbcontent,
105                    char *cbtype,
106                    size_t cblength,
107                    char *cbencoding,
108                    void *cbuserdata),
109                  void (*PreMultiPartCallBack)
110                   (char *cbname,
111                    char *cbfilename,
112                    char *cbpartnum,
113                    char *cbdisp,
114                    void *cbcontent,
115                    char *cbtype,
116                    size_t cblength,
117                    char *cbencoding,
118                    void *cbuserdata),
119                  void (*PostMultiPartCallBack)
120                   (char *cbname,
121                    char *cbfilename,
122                    char *cbpartnum,
123                    char *cbdisp,
124                    void *cbcontent,
125                    char *cbtype,
126                    size_t cblength,
127                    char *cbencoding,
128                    void *cbuserdata),
129                   void *userdata,
130                   int dont_decode
131 )
132 {
133
134         char *decoded;
135         struct stat statbuf;
136         int sendpipe[2];
137         int recvpipe[2];
138         int childpid;
139         size_t bytes_sent = 0;
140         size_t bytes_recv = 0;
141         size_t blocksize;
142         int write_error = 0;
143
144         lprintf(9, "mime_decode() called\n");
145
146         /* Some encodings aren't really encodings */
147         if (!strcasecmp(encoding, "7bit"))
148                 strcpy(encoding, "");
149         if (!strcasecmp(encoding, "8bit"))
150                 strcpy(encoding, "");
151         if (!strcasecmp(encoding, "binary"))
152                 strcpy(encoding, "");
153
154         /* If this part is not encoded, send as-is */
155         if ( (strlen(encoding) == 0) || (dont_decode)) {
156                 if (CallBack != NULL) {
157                         CallBack(name, filename, fixed_partnum(partnum),
158                                 disposition, part_start,
159                                 content_type, length, encoding, userdata);
160                         }
161                 return;
162         }
163         if ((strcasecmp(encoding, "base64"))
164             && (strcasecmp(encoding, "quoted-printable"))) {
165                 lprintf(5, "ERROR: unknown MIME encoding '%s'\n", encoding);
166                 return;
167         }
168         /*
169          * Allocate a buffer for the decoded data.  The output buffer is the
170          * same size as the input buffer; this assumes that the decoded data
171          * will never be larger than the encoded data.  This is a safe
172          * assumption with base64, uuencode, and quoted-printable.  Just to
173          * be safe, we still pad the buffer a bit.
174          */
175         decoded = mallok(length + 1024);
176         if (decoded == NULL) {
177                 lprintf(5, "ERROR: cannot allocate memory.\n");
178                 return;
179         }
180         if (pipe(sendpipe) != 0)
181                 return;
182         if (pipe(recvpipe) != 0)
183                 return;
184
185         childpid = fork();
186         if (childpid < 0) {
187                 phree(decoded);
188                 return;
189         }
190         if (childpid == 0) {
191                 close(2);
192                 /* send stdio to the pipes */
193                 if (dup2(sendpipe[0], 0) < 0)
194                         lprintf(5, "ERROR dup2()\n");
195                 if (dup2(recvpipe[1], 1) < 0)
196                         lprintf(5, "ERROR dup2()\n");
197                 close(sendpipe[1]);     /* Close the ends we're not using */
198                 close(recvpipe[0]);
199                 if (!strcasecmp(encoding, "base64"))
200                         execlp("./base64", "base64", "-d", NULL);
201                 else if (!strcasecmp(encoding, "quoted-printable"))
202                         execlp("./qpdecode", "qpdecode", NULL);
203                 lprintf(5, "ERROR: cannot exec decoder for %s\n", encoding);
204                 exit(1);
205         }
206         close(sendpipe[0]);     /* Close the ends we're not using  */
207         close(recvpipe[1]);
208
209         while ((bytes_sent < length) && (write_error == 0)) {
210                 /* Empty the input pipe FIRST */
211                 while (fstat(recvpipe[0], &statbuf), (statbuf.st_size > 0)) {
212                         blocksize = read(recvpipe[0], &decoded[bytes_recv],
213                                          statbuf.st_size);
214                         if (blocksize < 0)
215                                 lprintf(5, "ERROR: cannot read from pipe\n");
216                         else
217                                 bytes_recv = bytes_recv + blocksize;
218                 }
219                 /* Then put some data into the output pipe */
220                 blocksize = length - bytes_sent;
221                 if (blocksize > 2048)
222                         blocksize = 2048;
223                 if (write(sendpipe[1], &part_start[bytes_sent], blocksize) < 0) {
224                         lprintf(5, "ERROR: cannot write to pipe: %s\n",
225                                 strerror(errno));
226                         write_error = 1;
227                 }
228                 bytes_sent = bytes_sent + blocksize;
229         }
230         close(sendpipe[1]);
231         /* Empty the input pipe */
232         while ((blocksize = read(recvpipe[0], &decoded[bytes_recv], 1)),
233                (blocksize > 0)) {
234                 bytes_recv = bytes_recv + blocksize;
235         }
236
237         if (bytes_recv > 0) if (CallBack != NULL) {
238                 CallBack(name, filename, fixed_partnum(partnum),
239                         disposition, decoded,
240                         content_type, bytes_recv, "binary", userdata);
241         }
242
243         phree(decoded);
244 }
245
246 /*
247  * Break out the components of a multipart message
248  * (This function expects to be fed HEADERS + CONTENT)
249  * Note: NULL can be supplied as content_end; in this case, the message is
250  * considered to have ended when the parser encounters a 0x00 byte.
251  */
252 void the_mime_parser(char *partnum,
253                      char *content_start, char *content_end,
254                      void (*CallBack)
255                       (char *cbname,
256                        char *cbfilename,
257                        char *cbpartnum,
258                        char *cbdisp,
259                        void *cbcontent,
260                        char *cbtype,
261                        size_t cblength,
262                        char *cbencoding,
263                        void *cbuserdata),
264                      void (*PreMultiPartCallBack)
265                       (char *cbname,
266                        char *cbfilename,
267                        char *cbpartnum,
268                        char *cbdisp,
269                        void *cbcontent,
270                        char *cbtype,
271                        size_t cblength,
272                        char *cbencoding,
273                        void *cbuserdata),
274                      void (*PostMultiPartCallBack)
275                       (char *cbname,
276                        char *cbfilename,
277                        char *cbpartnum,
278                        char *cbdisp,
279                        void *cbcontent,
280                        char *cbtype,
281                        size_t cblength,
282                        char *cbencoding,
283                        void *cbuserdata),
284                       void *userdata,
285                       int dont_decode
286 )
287 {
288
289         char *ptr;
290         char *part_start, *part_end;
291         char buf[SIZ];
292         char header[SIZ];
293         char boundary[SIZ];
294         char startary[SIZ];
295         char endary[SIZ];
296         char content_type[SIZ];
297         char encoding[SIZ];
298         char disposition[SIZ];
299         char name[SIZ];
300         char filename[SIZ];
301         int is_multipart;
302         int part_seq = 0;
303         int i;
304         size_t length;
305         char nested_partnum[SIZ];
306
307         lprintf(9, "the_mime_parser() called\n");
308         ptr = content_start;
309         memset(boundary, 0, sizeof boundary);
310         memset(content_type, 0, sizeof content_type);
311         memset(encoding, 0, sizeof encoding);
312         memset(name, 0, sizeof name);
313         memset(filename, 0, sizeof filename);
314         memset(disposition, 0, sizeof disposition);
315
316         /* Learn interesting things from the headers */
317         strcpy(header, "");
318         do {
319                 ptr = memreadline(ptr, buf, sizeof buf);
320                 if (*ptr == 0)
321                         return; /* premature end of message */
322                 if (content_end != NULL)
323                         if (ptr >= content_end)
324                                 return;
325
326                 for (i = 0; i < strlen(buf); ++i)
327                         if (isspace(buf[i]))
328                                 buf[i] = ' ';
329                 if (!isspace(buf[0])) {
330                         if (!strncasecmp(header, "Content-type: ", 14)) {
331                                 strcpy(content_type, &header[14]);
332                                 extract_key(name, content_type, "name");
333                         }
334                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
335                                 strcpy(disposition, &header[21]);
336                                 extract_key(filename, disposition, "filename");
337                         }
338                         if (!strncasecmp(header,
339                                       "Content-transfer-encoding: ", 27))
340                                 strcpy(encoding, &header[27]);
341                         if (strlen(boundary) == 0)
342                                 extract_key(boundary, header, "boundary");
343                         strcpy(header, "");
344                 }
345                 if ((strlen(header) + strlen(buf) + 2) < sizeof(header))
346                         strcat(header, buf);
347         } while ((strlen(buf) > 0) && (*ptr != 0));
348
349         for (i = 0; i < strlen(disposition); ++i)
350                 if (disposition[i] == ';')
351                         disposition[i] = 0;
352         while (isspace(disposition[0]))
353                 strcpy(disposition, &disposition[1]);
354         for (i = 0; i < strlen(content_type); ++i)
355                 if (content_type[i] == ';')
356                         content_type[i] = 0;
357         while (isspace(content_type[0]))
358                 strcpy(content_type, &content_type[1]);
359
360         if (strlen(boundary) > 0) {
361                 is_multipart = 1;
362         } else {
363                 is_multipart = 0;
364         }
365
366         /* If this is a multipart message, then recursively process it */
367         part_start = NULL;
368         if (is_multipart) {
369
370                 /* Tell the client about this message's multipartedness */
371                 if (PreMultiPartCallBack != NULL) {
372                         PreMultiPartCallBack("", "", partnum, "",
373                                 NULL, content_type,
374                                 0, encoding, userdata);
375                 }
376                 /*
377                 if (CallBack != NULL) {
378                         CallBack("", "", fixed_partnum(partnum),
379                                 "", NULL, content_type,
380                                 0, encoding, userdata);
381                 }
382                  */
383
384                 /* Figure out where the boundaries are */
385                 sprintf(startary, "--%s", boundary);
386                 sprintf(endary, "--%s--", boundary);
387                 do {
388                         part_end = ptr;
389                         ptr = memreadline(ptr, buf, sizeof buf);
390                         if (*ptr == 0) goto END_MULTI;  /* premature end */
391                         if (content_end != NULL)
392                                 if (ptr >= content_end) goto END_MULTI;
393                         if ((!strcasecmp(buf, startary))
394                             || (!strcasecmp(buf, endary))) {
395                                 if (part_start != NULL) {
396                                         if (strlen(partnum) > 0) {
397                                                 sprintf(nested_partnum, "%s.%d",
398                                                         partnum, ++part_seq);
399                                         }
400                                         else {
401                                                 sprintf(nested_partnum, "%d",
402                                                         ++part_seq);
403                                         }
404                                         the_mime_parser(nested_partnum,
405                                                     part_start, part_end,
406                                                         CallBack,
407                                                         PreMultiPartCallBack,
408                                                         PostMultiPartCallBack,
409                                                         userdata,
410                                                         dont_decode);
411                                 }
412                                 part_start = ptr;
413                         }
414                 } while (strcasecmp(buf, endary));
415 END_MULTI:      if (PostMultiPartCallBack != NULL) {
416                         PostMultiPartCallBack("", "", partnum, "", NULL,
417                                 content_type, 0, encoding, userdata);
418                 }
419                 return;
420         }
421
422         /* If it's not a multipart message, then do something with it */
423         if (!is_multipart) {
424                 part_start = ptr;
425                 length = 0;
426                 while ((*ptr != 0)
427                       && ((content_end == NULL) || (ptr < content_end))) {
428                         ++length;
429                         part_end = ptr++;
430                 }
431                 mime_decode(partnum,
432                             part_start, length,
433                             content_type, encoding, disposition,
434                             name, filename,
435                             CallBack, NULL, NULL,
436                             userdata, dont_decode);
437         }
438
439
440 }
441
442
443
444 /*
445  * Entry point for the MIME parser.
446  * (This function expects to be fed HEADERS + CONTENT)
447  * Note: NULL can be supplied as content_end; in this case, the message is
448  * considered to have ended when the parser encounters a 0x00 byte.
449  */
450 void mime_parser(char *content_start,
451                 char *content_end,
452
453                  void (*CallBack)
454                   (char *cbname,
455                    char *cbfilename,
456                    char *cbpartnum,
457                    char *cbdisp,
458                    void *cbcontent,
459                    char *cbtype,
460                    size_t cblength,
461                    char *cbencoding,
462                    void *cbuserdata),
463
464                  void (*PreMultiPartCallBack)
465                   (char *cbname,
466                    char *cbfilename,
467                    char *cbpartnum,
468                    char *cbdisp,
469                    void *cbcontent,
470                    char *cbtype,
471                    size_t cblength,
472                    char *cbencoding,
473                    void *cbuserdata),
474
475                  void (*PostMultiPartCallBack)
476                   (char *cbname,
477                    char *cbfilename,
478                    char *cbpartnum,
479                    char *cbdisp,
480                    void *cbcontent,
481                    char *cbtype,
482                    size_t cblength,
483                    char *cbencoding,
484                    void *cbuserdata),
485
486                   void *userdata,
487                   int dont_decode
488 )
489 {
490
491         lprintf(9, "mime_parser() called\n");
492         the_mime_parser("", content_start, content_end,
493                         CallBack,
494                         PreMultiPartCallBack,
495                         PostMultiPartCallBack,
496                         userdata, dont_decode);
497 }