* More tweaks to the MIME parser
[citadel.git] / citadel / mime_parser.c
1 /*
2  * $Id$
3  *
4  * This is the MIME parser for Citadel.  Sometimes it actually works.
5  *
6  * Copyright (c) 1998-2001 by Art Cancro
7  * This code is distributed under the terms of the GNU General Public License.
8  *
9  */
10
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <stdio.h>
14 #include <signal.h>
15 #include <sys/types.h>
16 #include <ctype.h>
17 #include <string.h>
18 #include <sys/stat.h>
19 #include <errno.h>
20 #include "citadel.h"
21 #include "sysdep_decls.h"
22 #include "mime_parser.h"
23 #include "tools.h"
24
25
26 void extract_key(char *target, char *source, char *key)
27 {
28         int a, b;
29
30         strcpy(target, source);
31         for (a = 0; a < strlen(target); ++a) {
32                 if ((!strncasecmp(&target[a], key, strlen(key)))
33                     && (target[a + strlen(key)] == '=')) {
34                         strcpy(target, &target[a + strlen(key) + 1]);
35                         if (target[0] == 34)
36                                 strcpy(target, &target[1]);
37                         for (b = 0; b < strlen(target); ++b)
38                                 if (target[b] == 34)
39                                         target[b] = 0;
40                         return;
41                 }
42         }
43         strcpy(target, "");
44 }
45
46
47 /*
48  * For non-multipart messages, we need to generate a quickie partnum of "1"
49  * to return to callback functions.  Some callbacks demand it.
50  */
51 char *fixed_partnum(char *supplied_partnum) {
52         if (supplied_partnum == NULL) return "1";
53         if (strlen(supplied_partnum)==0) return "1";
54         return supplied_partnum;
55 }
56
57
58 /*
59  * Given a message or message-part body and a length, handle any necessary
60  * decoding and pass the request up the stack.
61  */
62 void mime_decode(char *partnum,
63                  char *part_start, size_t length,
64                  char *content_type, char *encoding,
65                  char *disposition,
66                  char *name, char *filename,
67                  void (*CallBack)
68                   (char *cbname,
69                    char *cbfilename,
70                    char *cbpartnum,
71                    char *cbdisp,
72                    void *cbcontent,
73                    char *cbtype,
74                    size_t cblength,
75                    char *cbencoding,
76                    void *cbuserdata),
77                  void (*PreMultiPartCallBack)
78                   (char *cbname,
79                    char *cbfilename,
80                    char *cbpartnum,
81                    char *cbdisp,
82                    void *cbcontent,
83                    char *cbtype,
84                    size_t cblength,
85                    char *cbencoding,
86                    void *cbuserdata),
87                  void (*PostMultiPartCallBack)
88                   (char *cbname,
89                    char *cbfilename,
90                    char *cbpartnum,
91                    char *cbdisp,
92                    void *cbcontent,
93                    char *cbtype,
94                    size_t cblength,
95                    char *cbencoding,
96                    void *cbuserdata),
97                   void *userdata,
98                   int dont_decode
99 )
100 {
101
102         char *decoded;
103         struct stat statbuf;
104         int sendpipe[2];
105         int recvpipe[2];
106         int childpid;
107         size_t bytes_sent = 0;
108         size_t bytes_recv = 0;
109         size_t blocksize;
110         int write_error = 0;
111
112         lprintf(9, "mime_decode() called\n");
113
114         /* Some encodings aren't really encodings */
115         if (!strcasecmp(encoding, "7bit"))
116                 strcpy(encoding, "");
117         if (!strcasecmp(encoding, "8bit"))
118                 strcpy(encoding, "");
119         if (!strcasecmp(encoding, "binary"))
120                 strcpy(encoding, "");
121
122         /* If this part is not encoded, send as-is */
123         if ( (strlen(encoding) == 0) || (dont_decode)) {
124                 if (CallBack != NULL) {
125                         CallBack(name, filename, fixed_partnum(partnum),
126                                 disposition, part_start,
127                                 content_type, length, encoding, userdata);
128                         }
129                 return;
130         }
131         if ((strcasecmp(encoding, "base64"))
132             && (strcasecmp(encoding, "quoted-printable"))) {
133                 lprintf(9, "ERROR: unknown MIME encoding '%s'\n", encoding);
134                 return;
135         }
136         /*
137          * Allocate a buffer for the decoded data.  The output buffer is the
138          * same size as the input buffer; this assumes that the decoded data
139          * will never be larger than the encoded data.  This is a safe
140          * assumption with base64, uuencode, and quoted-printable.  Just to
141          * be safe, we still pad the buffer a bit.
142          */
143         decoded = malloc(length + 1024);
144         if (decoded == NULL) {
145                 lprintf(9, "ERROR: cannot allocate memory.\n");
146                 return;
147         }
148         if (pipe(sendpipe) != 0)
149                 return;
150         if (pipe(recvpipe) != 0)
151                 return;
152
153         childpid = fork();
154         if (childpid < 0) {
155                 free(decoded);
156                 return;
157         }
158         if (childpid == 0) {
159                 close(2);
160                 /* send stdio to the pipes */
161                 if (dup2(sendpipe[0], 0) < 0)
162                         lprintf(9, "ERROR dup2()\n");
163                 if (dup2(recvpipe[1], 1) < 0)
164                         lprintf(9, "ERROR dup2()\n");
165                 close(sendpipe[1]);     /* Close the ends we're not using */
166                 close(recvpipe[0]);
167                 if (!strcasecmp(encoding, "base64"))
168                         execlp("./base64", "base64", "-d", NULL);
169                 else if (!strcasecmp(encoding, "quoted-printable"))
170                         execlp("./qpdecode", "qpdecode", NULL);
171                 lprintf(9, "ERROR: cannot exec decoder for %s\n", encoding);
172                 exit(1);
173         }
174         close(sendpipe[0]);     /* Close the ends we're not using  */
175         close(recvpipe[1]);
176
177         while ((bytes_sent < length) && (write_error == 0)) {
178                 /* Empty the input pipe FIRST */
179                 while (fstat(recvpipe[0], &statbuf), (statbuf.st_size > 0)) {
180                         blocksize = read(recvpipe[0], &decoded[bytes_recv],
181                                          statbuf.st_size);
182                         if (blocksize < 0)
183                                 lprintf(9, "ERROR: cannot read from pipe\n");
184                         else
185                                 bytes_recv = bytes_recv + blocksize;
186                 }
187                 /* Then put some data into the output pipe */
188                 blocksize = length - bytes_sent;
189                 if (blocksize > 2048)
190                         blocksize = 2048;
191                 if (write(sendpipe[1], &part_start[bytes_sent], blocksize) < 0) {
192                         lprintf(9, "ERROR: cannot write to pipe: %s\n",
193                                 strerror(errno));
194                         write_error = 1;
195                 }
196                 bytes_sent = bytes_sent + blocksize;
197         }
198         close(sendpipe[1]);
199         /* Empty the input pipe */
200         while ((blocksize = read(recvpipe[0], &decoded[bytes_recv], 1)),
201                (blocksize > 0)) {
202                 bytes_recv = bytes_recv + blocksize;
203         }
204
205         if (bytes_recv > 0) if (CallBack != NULL) {
206                 CallBack(name, filename, fixed_partnum(partnum),
207                         disposition, decoded,
208                         content_type, bytes_recv, "binary", userdata);
209         }
210
211         free(decoded);
212 }
213
214 /*
215  * Break out the components of a multipart message
216  * (This function expects to be fed HEADERS + CONTENT)
217  * Note: NULL can be supplied as content_end; in this case, the message is
218  * considered to have ended when the parser encounters a 0x00 byte.
219  */
220 void the_mime_parser(char *partnum,
221                      char *content_start, char *content_end,
222                      void (*CallBack)
223                       (char *cbname,
224                        char *cbfilename,
225                        char *cbpartnum,
226                        char *cbdisp,
227                        void *cbcontent,
228                        char *cbtype,
229                        size_t cblength,
230                        char *cbencoding,
231                        void *cbuserdata),
232                      void (*PreMultiPartCallBack)
233                       (char *cbname,
234                        char *cbfilename,
235                        char *cbpartnum,
236                        char *cbdisp,
237                        void *cbcontent,
238                        char *cbtype,
239                        size_t cblength,
240                        char *cbencoding,
241                        void *cbuserdata),
242                      void (*PostMultiPartCallBack)
243                       (char *cbname,
244                        char *cbfilename,
245                        char *cbpartnum,
246                        char *cbdisp,
247                        void *cbcontent,
248                        char *cbtype,
249                        size_t cblength,
250                        char *cbencoding,
251                        void *cbuserdata),
252                       void *userdata,
253                       int dont_decode
254 )
255 {
256
257         char *ptr;
258         char *part_start, *part_end = NULL;
259         char buf[SIZ];
260         char header[SIZ];
261         char boundary[SIZ];
262         char startary[SIZ];
263         char endary[SIZ];
264         char content_type[SIZ];
265         size_t content_length;
266         char encoding[SIZ];
267         char disposition[SIZ];
268         char name[SIZ];
269         char filename[SIZ];
270         int is_multipart;
271         int part_seq = 0;
272         int i;
273         size_t length;
274         char nested_partnum[SIZ];
275
276         lprintf(9, "the_mime_parser() called\n");
277         ptr = content_start;
278         memset(boundary, 0, sizeof boundary);
279         memset(content_type, 0, sizeof content_type);
280         memset(encoding, 0, sizeof encoding);
281         memset(name, 0, sizeof name);
282         memset(filename, 0, sizeof filename);
283         memset(disposition, 0, sizeof disposition);
284         content_length = 0;
285
286         /* If the caller didn't supply an endpointer, generate one by measure */
287         if (content_end == NULL) {
288                 content_end = &content_start[strlen(content_start)];
289         }
290
291         /* Learn interesting things from the headers */
292         strcpy(header, "");
293         do {
294                 ptr = memreadline(ptr, buf, sizeof buf);
295                 if (ptr >= content_end)
296                         return;
297
298                 for (i = 0; i < strlen(buf); ++i)
299                         if (isspace(buf[i]))
300                                 buf[i] = ' ';
301                 if (!isspace(buf[0])) {
302                         if (!strncasecmp(header, "Content-type: ", 14)) {
303                                 strcpy(content_type, &header[14]);
304                                 extract_key(name, content_type, "name");
305                         }
306                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
307                                 strcpy(disposition, &header[21]);
308                                 extract_key(filename, disposition, "filename");
309                         }
310                         if (!strncasecmp(header, "Content-length: ", 16)) {
311                                 content_length = (size_t) atol(&header[16]);
312                         }
313                         if (!strncasecmp(header,
314                                       "Content-transfer-encoding: ", 27))
315                                 strcpy(encoding, &header[27]);
316                         if (strlen(boundary) == 0)
317                                 extract_key(boundary, header, "boundary");
318                         strcpy(header, "");
319                 }
320                 if ((strlen(header) + strlen(buf) + 2) < sizeof(header))
321                         strcat(header, buf);
322         } while ((strlen(buf) > 0) && (*ptr != 0));
323
324         for (i = 0; i < strlen(disposition); ++i)
325                 if (disposition[i] == ';')
326                         disposition[i] = 0;
327         while (isspace(disposition[0]))
328                 strcpy(disposition, &disposition[1]);
329         for (i = 0; i < strlen(content_type); ++i)
330                 if (content_type[i] == ';')
331                         content_type[i] = 0;
332         while (isspace(content_type[0]))
333                 strcpy(content_type, &content_type[1]);
334
335         if (strlen(boundary) > 0) {
336                 is_multipart = 1;
337         } else {
338                 is_multipart = 0;
339         }
340
341         lprintf(9, "is_multipart=%d, boundary=<%s>\n",
342                 is_multipart, boundary);
343
344         /* If this is a multipart message, then recursively process it */
345         part_start = NULL;
346         if (is_multipart) {
347
348                 /* Tell the client about this message's multipartedness */
349                 if (PreMultiPartCallBack != NULL) {
350                         PreMultiPartCallBack("", "", partnum, "",
351                                 NULL, content_type,
352                                 0, encoding, userdata);
353                 }
354
355                 /* Figure out where the boundaries are */
356                 sprintf(startary, "--%s", boundary);
357                 sprintf(endary, "--%s--", boundary);
358                 do {
359                         if ( (!strncasecmp(ptr, startary, strlen(startary)))
360                            || (!strncasecmp(ptr, endary, strlen(endary))) ) {
361                                 lprintf(9, "hit boundary!\n");
362                                 if (part_start != NULL) {
363                                         if (strlen(partnum) > 0) {
364                                                 sprintf(nested_partnum, "%s.%d",
365                                                         partnum, ++part_seq);
366                                         }
367                                         else {
368                                                 sprintf(nested_partnum, "%d",
369                                                         ++part_seq);
370                                         }
371                                         the_mime_parser(nested_partnum,
372                                                     part_start, part_end,
373                                                         CallBack,
374                                                         PreMultiPartCallBack,
375                                                         PostMultiPartCallBack,
376                                                         userdata,
377                                                         dont_decode);
378                                 }
379                                 ptr = memreadline(ptr, buf, sizeof(buf));
380                                 part_start = ptr;
381                         }
382                         else {
383                                 part_end = ptr;
384                                 ++ptr;
385                         }
386                 } while ( (strcasecmp(ptr, endary)) && (ptr <= content_end) );
387                 if (PostMultiPartCallBack != NULL) {
388                         PostMultiPartCallBack("", "", partnum, "", NULL,
389                                 content_type, 0, encoding, userdata);
390                 }
391                 return;
392         }
393
394         /* If it's not a multipart message, then do something with it */
395         if (!is_multipart) {
396                 lprintf(9, "doing non-multipart thing\n");
397                 part_start = ptr;
398                 length = 0;
399                 while (ptr < content_end) {
400                         ++ptr;
401                         ++length;
402                 }
403                 part_end = content_end;
404                 
405                 /* Truncate if the header told us to */
406                 if ( (content_length > 0) && (length > content_length) ) {
407                         length = content_length;
408                         lprintf(9, "truncated to %d\n", content_length);
409                 }
410                 
411                 mime_decode(partnum,
412                             part_start, length,
413                             content_type, encoding, disposition,
414                             name, filename,
415                             CallBack, NULL, NULL,
416                             userdata, dont_decode);
417         }
418 }
419
420
421
422 /*
423  * Entry point for the MIME parser.
424  * (This function expects to be fed HEADERS + CONTENT)
425  * Note: NULL can be supplied as content_end; in this case, the message is
426  * considered to have ended when the parser encounters a 0x00 byte.
427  */
428 void mime_parser(char *content_start,
429                 char *content_end,
430
431                  void (*CallBack)
432                   (char *cbname,
433                    char *cbfilename,
434                    char *cbpartnum,
435                    char *cbdisp,
436                    void *cbcontent,
437                    char *cbtype,
438                    size_t cblength,
439                    char *cbencoding,
440                    void *cbuserdata),
441
442                  void (*PreMultiPartCallBack)
443                   (char *cbname,
444                    char *cbfilename,
445                    char *cbpartnum,
446                    char *cbdisp,
447                    void *cbcontent,
448                    char *cbtype,
449                    size_t cblength,
450                    char *cbencoding,
451                    void *cbuserdata),
452
453                  void (*PostMultiPartCallBack)
454                   (char *cbname,
455                    char *cbfilename,
456                    char *cbpartnum,
457                    char *cbdisp,
458                    void *cbcontent,
459                    char *cbtype,
460                    size_t cblength,
461                    char *cbencoding,
462                    void *cbuserdata),
463
464                   void *userdata,
465                   int dont_decode
466 )
467 {
468
469         lprintf(9, "mime_parser() called\n");
470         the_mime_parser("", content_start, content_end,
471                         CallBack,
472                         PreMultiPartCallBack,
473                         PostMultiPartCallBack,
474                         userdata, dont_decode);
475 }