]> code.citadel.org Git - citadel.git/blob - citadel/mime_parser.c
* Fixed bug in mime_parser.c that caused parts to be dropped when the last
[citadel.git] / citadel / mime_parser.c
1 /*
2  * $Id$
3  *
4  * This is a really bad attempt at writing a parser to handle MIME-encoded
5  * messages.
6  *
7  * Copyright (c) 1998-1999 by Art Cancro
8  * This code is distributed under the terms of the GNU General Public License.
9  *
10  */
11
12 #include "sysdep.h"
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <stdio.h>
16 #include <signal.h>
17 #include <sys/types.h>
18 #include <ctype.h>
19 #include <string.h>
20 #include <sys/stat.h>
21 #include <errno.h>
22 #include "citadel.h"
23 #include "mime_parser.h"
24 #include "sysdep_decls.h"
25 #include "server.h"
26
27
28
29 void extract_key(char *target, char *source, char *key)
30 {
31         int a, b;
32
33         strcpy(target, source);
34         for (a = 0; a < strlen(target); ++a) {
35                 if ((!strncasecmp(&target[a], key, strlen(key)))
36                     && (target[a + strlen(key)] == '=')) {
37                         strcpy(target, &target[a + strlen(key) + 1]);
38                         if (target[0] == 34)
39                                 strcpy(target, &target[1]);
40                         for (b = 0; b < strlen(target); ++b)
41                                 if (target[b] == 34)
42                                         target[b] = 0;
43                         return;
44                 }
45         }
46         strcpy(target, "");
47 }
48
49
50
51 /* 
52  * Utility function to "readline" from memory
53  * (returns new pointer)
54  */
55 char *memreadline(char *start, char *buf, int maxlen)
56 {
57         char ch;
58         char *ptr;
59         int len = 0;    /* tally our own length to avoid strlen() delays */
60
61         ptr = start;
62         memset(buf, 0, maxlen);
63
64         while (1) {
65                 ch = *ptr++;
66                 if ( (len < (maxlen - 1)) && (ch != 13) && (ch != 10) ) {
67                         buf[strlen(buf) + 1] = 0;
68                         buf[strlen(buf)] = ch;
69                         ++len;
70                 }
71                 if ((ch == 10) || (ch == 0)) {
72                         return ptr;
73                 }
74         }
75 }
76
77
78 /*
79  * For non-multipart messages, we need to generate a quickie partnum of "1"
80  * to return to callback functions.  Some callbacks demand it.
81  */
82 char *fixed_partnum(char *supplied_partnum) {
83         if (supplied_partnum == NULL) return "1";
84         if (strlen(supplied_partnum)==0) return "1";
85         return supplied_partnum;
86 }
87
88
89 /*
90  * Given a message or message-part body and a length, handle any necessary
91  * decoding and pass the request up the stack.
92  */
93 void mime_decode(char *partnum,
94                  char *part_start, size_t length,
95                  char *content_type, char *encoding,
96                  char *disposition,
97                  char *name, char *filename,
98                  void (*CallBack)
99                   (char *cbname,
100                    char *cbfilename,
101                    char *cbpartnum,
102                    char *cbdisp,
103                    void *cbcontent,
104                    char *cbtype,
105                    size_t cblength,
106                    char *cbencoding,
107                    void *cbuserdata),
108                  void (*PreMultiPartCallBack)
109                   (char *cbname,
110                    char *cbfilename,
111                    char *cbpartnum,
112                    char *cbdisp,
113                    void *cbcontent,
114                    char *cbtype,
115                    size_t cblength,
116                    char *cbencoding,
117                    void *cbuserdata),
118                  void (*PostMultiPartCallBack)
119                   (char *cbname,
120                    char *cbfilename,
121                    char *cbpartnum,
122                    char *cbdisp,
123                    void *cbcontent,
124                    char *cbtype,
125                    size_t cblength,
126                    char *cbencoding,
127                    void *cbuserdata),
128                   void *userdata,
129                   int dont_decode
130 )
131 {
132
133         char *decoded;
134         struct stat statbuf;
135         int sendpipe[2];
136         int recvpipe[2];
137         int childpid;
138         size_t bytes_sent = 0;
139         size_t bytes_recv = 0;
140         size_t blocksize;
141         int write_error = 0;
142
143         lprintf(9, "mime_decode() called\n");
144
145         /* Some encodings aren't really encodings */
146         if (!strcasecmp(encoding, "7bit"))
147                 strcpy(encoding, "");
148         if (!strcasecmp(encoding, "8bit"))
149                 strcpy(encoding, "");
150         if (!strcasecmp(encoding, "binary"))
151                 strcpy(encoding, "");
152
153         /* If this part is not encoded, send as-is */
154         if ( (strlen(encoding) == 0) || (dont_decode)) {
155                 if (CallBack != NULL) {
156                         CallBack(name, filename, fixed_partnum(partnum),
157                                 disposition, part_start,
158                                 content_type, length, encoding, userdata);
159                         }
160                 return;
161         }
162         if ((strcasecmp(encoding, "base64"))
163             && (strcasecmp(encoding, "quoted-printable"))) {
164                 lprintf(5, "ERROR: unknown MIME encoding '%s'\n", encoding);
165                 return;
166         }
167         /*
168          * Allocate a buffer for the decoded data.  The output buffer is the
169          * same size as the input buffer; this assumes that the decoded data
170          * will never be larger than the encoded data.  This is a safe
171          * assumption with base64, uuencode, and quoted-printable.  Just to
172          * be safe, we still pad the buffer a bit.
173          */
174         decoded = mallok(length + 1024);
175         if (decoded == NULL) {
176                 lprintf(5, "ERROR: cannot allocate memory.\n");
177                 return;
178         }
179         if (pipe(sendpipe) != 0)
180                 return;
181         if (pipe(recvpipe) != 0)
182                 return;
183
184         childpid = fork();
185         if (childpid < 0) {
186                 phree(decoded);
187                 return;
188         }
189         if (childpid == 0) {
190                 close(2);
191                 /* send stdio to the pipes */
192                 if (dup2(sendpipe[0], 0) < 0)
193                         lprintf(5, "ERROR dup2()\n");
194                 if (dup2(recvpipe[1], 1) < 0)
195                         lprintf(5, "ERROR dup2()\n");
196                 close(sendpipe[1]);     /* Close the ends we're not using */
197                 close(recvpipe[0]);
198                 if (!strcasecmp(encoding, "base64"))
199                         execlp("./base64", "base64", "-d", NULL);
200                 else if (!strcasecmp(encoding, "quoted-printable"))
201                         execlp("./qpdecode", "qpdecode", NULL);
202                 lprintf(5, "ERROR: cannot exec decoder for %s\n", encoding);
203                 exit(1);
204         }
205         close(sendpipe[0]);     /* Close the ends we're not using  */
206         close(recvpipe[1]);
207
208         while ((bytes_sent < length) && (write_error == 0)) {
209                 /* Empty the input pipe FIRST */
210                 while (fstat(recvpipe[0], &statbuf), (statbuf.st_size > 0)) {
211                         blocksize = read(recvpipe[0], &decoded[bytes_recv],
212                                          statbuf.st_size);
213                         if (blocksize < 0)
214                                 lprintf(5, "ERROR: cannot read from pipe\n");
215                         else
216                                 bytes_recv = bytes_recv + blocksize;
217                 }
218                 /* Then put some data into the output pipe */
219                 blocksize = length - bytes_sent;
220                 if (blocksize > 2048)
221                         blocksize = 2048;
222                 if (write(sendpipe[1], &part_start[bytes_sent], blocksize) < 0) {
223                         lprintf(5, "ERROR: cannot write to pipe: %s\n",
224                                 strerror(errno));
225                         write_error = 1;
226                 }
227                 bytes_sent = bytes_sent + blocksize;
228         }
229         close(sendpipe[1]);
230         /* Empty the input pipe */
231         while ((blocksize = read(recvpipe[0], &decoded[bytes_recv], 1)),
232                (blocksize > 0)) {
233                 bytes_recv = bytes_recv + blocksize;
234         }
235
236         if (bytes_recv > 0) if (CallBack != NULL) {
237                 CallBack(name, filename, fixed_partnum(partnum),
238                         disposition, decoded,
239                         content_type, bytes_recv, "binary", userdata);
240         }
241
242         phree(decoded);
243 }
244
245 /*
246  * Break out the components of a multipart message
247  * (This function expects to be fed HEADERS + CONTENT)
248  * Note: NULL can be supplied as content_end; in this case, the message is
249  * considered to have ended when the parser encounters a 0x00 byte.
250  */
251 void the_mime_parser(char *partnum,
252                      char *content_start, char *content_end,
253                      void (*CallBack)
254                       (char *cbname,
255                        char *cbfilename,
256                        char *cbpartnum,
257                        char *cbdisp,
258                        void *cbcontent,
259                        char *cbtype,
260                        size_t cblength,
261                        char *cbencoding,
262                        void *cbuserdata),
263                      void (*PreMultiPartCallBack)
264                       (char *cbname,
265                        char *cbfilename,
266                        char *cbpartnum,
267                        char *cbdisp,
268                        void *cbcontent,
269                        char *cbtype,
270                        size_t cblength,
271                        char *cbencoding,
272                        void *cbuserdata),
273                      void (*PostMultiPartCallBack)
274                       (char *cbname,
275                        char *cbfilename,
276                        char *cbpartnum,
277                        char *cbdisp,
278                        void *cbcontent,
279                        char *cbtype,
280                        size_t cblength,
281                        char *cbencoding,
282                        void *cbuserdata),
283                       void *userdata,
284                       int dont_decode
285 )
286 {
287
288         char *ptr;
289         char *part_start, *part_end;
290         char buf[SIZ];
291         char header[SIZ];
292         char boundary[SIZ];
293         char startary[SIZ];
294         char endary[SIZ];
295         char content_type[SIZ];
296         char encoding[SIZ];
297         char disposition[SIZ];
298         char name[SIZ];
299         char filename[SIZ];
300         int is_multipart;
301         int part_seq = 0;
302         int i;
303         size_t length;
304         char nested_partnum[SIZ];
305
306         lprintf(9, "the_mime_parser() called\n");
307         ptr = content_start;
308         memset(boundary, 0, sizeof boundary);
309         memset(content_type, 0, sizeof content_type);
310         memset(encoding, 0, sizeof encoding);
311         memset(name, 0, sizeof name);
312         memset(filename, 0, sizeof filename);
313         memset(disposition, 0, sizeof disposition);
314
315         /* Learn interesting things from the headers */
316         strcpy(header, "");
317         do {
318                 ptr = memreadline(ptr, buf, sizeof buf);
319                 if (*ptr == 0)
320                         return; /* premature end of message */
321                 if (content_end != NULL)
322                         if (ptr >= content_end)
323                                 return;
324
325                 for (i = 0; i < strlen(buf); ++i)
326                         if (isspace(buf[i]))
327                                 buf[i] = ' ';
328                 if (!isspace(buf[0])) {
329                         if (!strncasecmp(header, "Content-type: ", 14)) {
330                                 strcpy(content_type, &header[14]);
331                                 extract_key(name, content_type, "name");
332                         }
333                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
334                                 strcpy(disposition, &header[21]);
335                                 extract_key(filename, disposition, "filename");
336                         }
337                         if (!strncasecmp(header,
338                                       "Content-transfer-encoding: ", 27))
339                                 strcpy(encoding, &header[27]);
340                         if (strlen(boundary) == 0)
341                                 extract_key(boundary, header, "boundary");
342                         strcpy(header, "");
343                 }
344                 if ((strlen(header) + strlen(buf) + 2) < sizeof(header))
345                         strcat(header, buf);
346         } while ((strlen(buf) > 0) && (*ptr != 0));
347
348         for (i = 0; i < strlen(disposition); ++i)
349                 if (disposition[i] == ';')
350                         disposition[i] = 0;
351         while (isspace(disposition[0]))
352                 strcpy(disposition, &disposition[1]);
353         for (i = 0; i < strlen(content_type); ++i)
354                 if (content_type[i] == ';')
355                         content_type[i] = 0;
356         while (isspace(content_type[0]))
357                 strcpy(content_type, &content_type[1]);
358
359         if (strlen(boundary) > 0) {
360                 is_multipart = 1;
361         } else {
362                 is_multipart = 0;
363         }
364
365         /* If this is a multipart message, then recursively process it */
366         part_start = NULL;
367         if (is_multipart) {
368
369                 /* Tell the client about this message's multipartedness */
370                 if (PreMultiPartCallBack != NULL) {
371                         PreMultiPartCallBack("", "", partnum, "",
372                                 NULL, content_type,
373                                 0, encoding, userdata);
374                 }
375                 /*
376                 if (CallBack != NULL) {
377                         CallBack("", "", fixed_partnum(partnum),
378                                 "", NULL, content_type,
379                                 0, encoding, userdata);
380                 }
381                  */
382
383                 /* Figure out where the boundaries are */
384                 sprintf(startary, "--%s", boundary);
385                 sprintf(endary, "--%s--", boundary);
386                 do {
387                         part_end = ptr;
388                         ptr = memreadline(ptr, buf, sizeof buf);
389                         if (content_end != NULL)
390                                 if (ptr >= content_end) goto END_MULTI;
391
392                         if ( (!strcasecmp(buf, startary))
393                            || (!strcasecmp(buf, endary)) ) {
394                                 if (part_start != NULL) {
395                                         if (strlen(partnum) > 0) {
396                                                 sprintf(nested_partnum, "%s.%d",
397                                                         partnum, ++part_seq);
398                                         }
399                                         else {
400                                                 sprintf(nested_partnum, "%d",
401                                                         ++part_seq);
402                                         }
403                                         the_mime_parser(nested_partnum,
404                                                     part_start, part_end,
405                                                         CallBack,
406                                                         PreMultiPartCallBack,
407                                                         PostMultiPartCallBack,
408                                                         userdata,
409                                                         dont_decode);
410                                 }
411                                 part_start = ptr;
412                         }
413                 } while ( (strcasecmp(buf, endary)) && (ptr != 0) );
414 END_MULTI:      if (PostMultiPartCallBack != NULL) {
415                         PostMultiPartCallBack("", "", partnum, "", NULL,
416                                 content_type, 0, encoding, userdata);
417                 }
418                 return;
419         }
420
421         /* If it's not a multipart message, then do something with it */
422         if (!is_multipart) {
423                 part_start = ptr;
424                 length = 0;
425                 while ((*ptr != 0)
426                       && ((content_end == NULL) || (ptr < content_end))) {
427                         ++length;
428                         part_end = ptr++;
429                 }
430                 mime_decode(partnum,
431                             part_start, length,
432                             content_type, encoding, disposition,
433                             name, filename,
434                             CallBack, NULL, NULL,
435                             userdata, dont_decode);
436         }
437
438
439 }
440
441
442
443 /*
444  * Entry point for the MIME parser.
445  * (This function expects to be fed HEADERS + CONTENT)
446  * Note: NULL can be supplied as content_end; in this case, the message is
447  * considered to have ended when the parser encounters a 0x00 byte.
448  */
449 void mime_parser(char *content_start,
450                 char *content_end,
451
452                  void (*CallBack)
453                   (char *cbname,
454                    char *cbfilename,
455                    char *cbpartnum,
456                    char *cbdisp,
457                    void *cbcontent,
458                    char *cbtype,
459                    size_t cblength,
460                    char *cbencoding,
461                    void *cbuserdata),
462
463                  void (*PreMultiPartCallBack)
464                   (char *cbname,
465                    char *cbfilename,
466                    char *cbpartnum,
467                    char *cbdisp,
468                    void *cbcontent,
469                    char *cbtype,
470                    size_t cblength,
471                    char *cbencoding,
472                    void *cbuserdata),
473
474                  void (*PostMultiPartCallBack)
475                   (char *cbname,
476                    char *cbfilename,
477                    char *cbpartnum,
478                    char *cbdisp,
479                    void *cbcontent,
480                    char *cbtype,
481                    size_t cblength,
482                    char *cbencoding,
483                    void *cbuserdata),
484
485                   void *userdata,
486                   int dont_decode
487 )
488 {
489
490         lprintf(9, "mime_parser() called\n");
491         the_mime_parser("", content_start, content_end,
492                         CallBack,
493                         PreMultiPartCallBack,
494                         PostMultiPartCallBack,
495                         userdata, dont_decode);
496 }