94a508cb3d2b051d21f17313cb19fe3b33b87d5c
[citadel.git] / citadel / mime_parser.c
1 /*
2  * $Id$
3  *
4  * This is the MIME parser for Citadel.  Sometimes it actually works.
5  *
6  * Copyright (c) 1998-2001 by Art Cancro
7  * This code is distributed under the terms of the GNU General Public License.
8  *
9  */
10
11 #ifdef DLL_EXPORT
12 #define IN_LIBCIT
13 #endif
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <stdio.h>
18 #include <signal.h>
19 #include <sys/types.h>
20 #include <ctype.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <errno.h>
24 #include "citadel.h"
25 #include "server.h"
26 #include "dynloader.h"
27 #include "sysdep_decls.h"
28 #include "mime_parser.h"
29 #include "tools.h"
30
31
32 void extract_key(char *target, char *source, char *key)
33 {
34         int a, b;
35
36         strcpy(target, source);
37         for (a = 0; a < strlen(target); ++a) {
38                 if ((!strncasecmp(&target[a], key, strlen(key)))
39                     && (target[a + strlen(key)] == '=')) {
40                         strcpy(target, &target[a + strlen(key) + 1]);
41                         if (target[0] == 34)
42                                 strcpy(target, &target[1]);
43                         for (b = 0; b < strlen(target); ++b)
44                                 if (target[b] == 34)
45                                         target[b] = 0;
46                         return;
47                 }
48         }
49         strcpy(target, "");
50 }
51
52
53 /*
54  * For non-multipart messages, we need to generate a quickie partnum of "1"
55  * to return to callback functions.  Some callbacks demand it.
56  */
57 char *fixed_partnum(char *supplied_partnum) {
58         if (supplied_partnum == NULL) return "1";
59         if (strlen(supplied_partnum)==0) return "1";
60         return supplied_partnum;
61 }
62
63
64 /*
65  * Given a message or message-part body and a length, handle any necessary
66  * decoding and pass the request up the stack.
67  */
68 void mime_decode(char *partnum,
69                  char *part_start, size_t length,
70                  char *content_type, char *encoding,
71                  char *disposition,
72                  char *name, char *filename,
73                  void (*CallBack)
74                   (char *cbname,
75                    char *cbfilename,
76                    char *cbpartnum,
77                    char *cbdisp,
78                    void *cbcontent,
79                    char *cbtype,
80                    size_t cblength,
81                    char *cbencoding,
82                    void *cbuserdata),
83                  void (*PreMultiPartCallBack)
84                   (char *cbname,
85                    char *cbfilename,
86                    char *cbpartnum,
87                    char *cbdisp,
88                    void *cbcontent,
89                    char *cbtype,
90                    size_t cblength,
91                    char *cbencoding,
92                    void *cbuserdata),
93                  void (*PostMultiPartCallBack)
94                   (char *cbname,
95                    char *cbfilename,
96                    char *cbpartnum,
97                    char *cbdisp,
98                    void *cbcontent,
99                    char *cbtype,
100                    size_t cblength,
101                    char *cbencoding,
102                    void *cbuserdata),
103                   void *userdata,
104                   int dont_decode
105 )
106 {
107
108         char *decoded;
109         struct stat statbuf;
110         int sendpipe[2];
111         int recvpipe[2];
112         int childpid;
113         size_t bytes_sent = 0;
114         size_t bytes_recv = 0;
115         size_t blocksize;
116         int write_error = 0;
117
118         lprintf(9, "mime_decode() called\n");
119
120         /* Some encodings aren't really encodings */
121         if (!strcasecmp(encoding, "7bit"))
122                 strcpy(encoding, "");
123         if (!strcasecmp(encoding, "8bit"))
124                 strcpy(encoding, "");
125         if (!strcasecmp(encoding, "binary"))
126                 strcpy(encoding, "");
127
128         /* If this part is not encoded, send as-is */
129         if ( (strlen(encoding) == 0) || (dont_decode)) {
130                 if (CallBack != NULL) {
131                         CallBack(name, filename, fixed_partnum(partnum),
132                                 disposition, part_start,
133                                 content_type, length, encoding, userdata);
134                         }
135                 return;
136         }
137         if ((strcasecmp(encoding, "base64"))
138             && (strcasecmp(encoding, "quoted-printable"))) {
139                 lprintf(9, "ERROR: unknown MIME encoding '%s'\n", encoding);
140                 return;
141         }
142         /*
143          * Allocate a buffer for the decoded data.  The output buffer is the
144          * same size as the input buffer; this assumes that the decoded data
145          * will never be larger than the encoded data.  This is a safe
146          * assumption with base64, uuencode, and quoted-printable.  Just to
147          * be safe, we still pad the buffer a bit.
148          */
149         decoded = malloc(length + 1024);
150         if (decoded == NULL) {
151                 lprintf(9, "ERROR: cannot allocate memory.\n");
152                 return;
153         }
154         if (pipe(sendpipe) != 0)
155                 return;
156         if (pipe(recvpipe) != 0)
157                 return;
158
159         childpid = fork();
160         if (childpid < 0) {
161                 free(decoded);
162                 return;
163         }
164         if (childpid == 0) {
165                 close(2);
166                 /* send stdio to the pipes */
167                 if (dup2(sendpipe[0], 0) < 0)
168                         lprintf(9, "ERROR dup2()\n");
169                 if (dup2(recvpipe[1], 1) < 0)
170                         lprintf(9, "ERROR dup2()\n");
171                 close(sendpipe[1]);     /* Close the ends we're not using */
172                 close(recvpipe[0]);
173                 if (!strcasecmp(encoding, "base64"))
174                         execlp("./base64", "base64", "-d", NULL);
175                 else if (!strcasecmp(encoding, "quoted-printable"))
176                         execlp("./qpdecode", "qpdecode", NULL);
177                 lprintf(9, "ERROR: cannot exec decoder for %s\n", encoding);
178                 exit(1);
179         }
180         close(sendpipe[0]);     /* Close the ends we're not using  */
181         close(recvpipe[1]);
182
183         while ((bytes_sent < length) && (write_error == 0)) {
184                 /* Empty the input pipe FIRST */
185                 while (fstat(recvpipe[0], &statbuf), (statbuf.st_size > 0)) {
186                         blocksize = read(recvpipe[0], &decoded[bytes_recv],
187                                          statbuf.st_size);
188                         if (blocksize < 0)
189                                 lprintf(9, "ERROR: cannot read from pipe\n");
190                         else
191                                 bytes_recv = bytes_recv + blocksize;
192                 }
193                 /* Then put some data into the output pipe */
194                 blocksize = length - bytes_sent;
195                 if (blocksize > 2048)
196                         blocksize = 2048;
197                 if (write(sendpipe[1], &part_start[bytes_sent], blocksize) < 0) {
198                         lprintf(9, "ERROR: cannot write to pipe: %s\n",
199                                 strerror(errno));
200                         write_error = 1;
201                 }
202                 bytes_sent = bytes_sent + blocksize;
203         }
204         close(sendpipe[1]);
205         /* Empty the input pipe */
206         while ((blocksize = read(recvpipe[0], &decoded[bytes_recv], 1)),
207                (blocksize > 0)) {
208                 bytes_recv = bytes_recv + blocksize;
209         }
210
211         if (bytes_recv > 0) if (CallBack != NULL) {
212                 CallBack(name, filename, fixed_partnum(partnum),
213                         disposition, decoded,
214                         content_type, bytes_recv, "binary", userdata);
215         }
216
217         free(decoded);
218 }
219
220 /*
221  * Break out the components of a multipart message
222  * (This function expects to be fed HEADERS + CONTENT)
223  * Note: NULL can be supplied as content_end; in this case, the message is
224  * considered to have ended when the parser encounters a 0x00 byte.
225  */
226 void the_mime_parser(char *partnum,
227                      char *content_start, char *content_end,
228                      void (*CallBack)
229                       (char *cbname,
230                        char *cbfilename,
231                        char *cbpartnum,
232                        char *cbdisp,
233                        void *cbcontent,
234                        char *cbtype,
235                        size_t cblength,
236                        char *cbencoding,
237                        void *cbuserdata),
238                      void (*PreMultiPartCallBack)
239                       (char *cbname,
240                        char *cbfilename,
241                        char *cbpartnum,
242                        char *cbdisp,
243                        void *cbcontent,
244                        char *cbtype,
245                        size_t cblength,
246                        char *cbencoding,
247                        void *cbuserdata),
248                      void (*PostMultiPartCallBack)
249                       (char *cbname,
250                        char *cbfilename,
251                        char *cbpartnum,
252                        char *cbdisp,
253                        void *cbcontent,
254                        char *cbtype,
255                        size_t cblength,
256                        char *cbencoding,
257                        void *cbuserdata),
258                       void *userdata,
259                       int dont_decode
260 )
261 {
262
263         char *ptr;
264         char *part_start, *part_end = NULL;
265         char buf[SIZ];
266         char header[SIZ];
267         char boundary[SIZ];
268         char startary[SIZ];
269         char endary[SIZ];
270         char content_type[SIZ];
271         size_t content_length;
272         char encoding[SIZ];
273         char disposition[SIZ];
274         char name[SIZ];
275         char filename[SIZ];
276         int is_multipart;
277         int part_seq = 0;
278         int i;
279         size_t length;
280         char nested_partnum[SIZ];
281
282         lprintf(9, "the_mime_parser() called\n");
283         ptr = content_start;
284         memset(boundary, 0, sizeof boundary);
285         memset(content_type, 0, sizeof content_type);
286         memset(encoding, 0, sizeof encoding);
287         memset(name, 0, sizeof name);
288         memset(filename, 0, sizeof filename);
289         memset(disposition, 0, sizeof disposition);
290         content_length = 0;
291
292         /* If the caller didn't supply an endpointer, generate one by measure */
293         if (content_end == NULL) {
294                 content_end = &content_start[strlen(content_start)];
295         }
296
297         /* Learn interesting things from the headers */
298         strcpy(header, "");
299         do {
300                 ptr = memreadline(ptr, buf, sizeof buf);
301                 if (ptr >= content_end)
302                         return;
303
304                 for (i = 0; i < strlen(buf); ++i)
305                         if (isspace(buf[i]))
306                                 buf[i] = ' ';
307                 if (!isspace(buf[0])) {
308                         if (!strncasecmp(header, "Content-type: ", 14)) {
309                                 strcpy(content_type, &header[14]);
310                                 extract_key(name, content_type, "name");
311                                 lprintf(9, "Extracted content-type <%s>\n",
312                                         content_type);
313                         }
314                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
315                                 strcpy(disposition, &header[21]);
316                                 extract_key(filename, disposition, "filename");
317                         }
318                         if (!strncasecmp(header, "Content-length: ", 16)) {
319                                 content_length = (size_t) atol(&header[16]);
320                         }
321                         if (!strncasecmp(header,
322                                       "Content-transfer-encoding: ", 27))
323                                 strcpy(encoding, &header[27]);
324                         if (strlen(boundary) == 0)
325                                 extract_key(boundary, header, "boundary");
326                         strcpy(header, "");
327                 }
328                 if ((strlen(header) + strlen(buf) + 2) < sizeof(header))
329                         strcat(header, buf);
330         } while ((strlen(buf) > 0) && (*ptr != 0));
331
332         for (i = 0; i < strlen(disposition); ++i)
333                 if (disposition[i] == ';')
334                         disposition[i] = 0;
335         while (isspace(disposition[0]))
336                 strcpy(disposition, &disposition[1]);
337         for (i = 0; i < strlen(content_type); ++i)
338                 if (content_type[i] == ';')
339                         content_type[i] = 0;
340         while (isspace(content_type[0]))
341                 strcpy(content_type, &content_type[1]);
342
343         if (strlen(boundary) > 0) {
344                 is_multipart = 1;
345         } else {
346                 is_multipart = 0;
347         }
348
349         lprintf(9, "is_multipart=%d, boundary=<%s>\n",
350                 is_multipart, boundary);
351
352         /* If this is a multipart message, then recursively process it */
353         part_start = NULL;
354         if (is_multipart) {
355
356                 /* Tell the client about this message's multipartedness */
357                 if (PreMultiPartCallBack != NULL) {
358                         PreMultiPartCallBack("", "", partnum, "",
359                                 NULL, content_type,
360                                 0, encoding, userdata);
361                 }
362
363                 /* Figure out where the boundaries are */
364                 sprintf(startary, "--%s", boundary);
365                 sprintf(endary, "--%s--", boundary);
366                 do {
367                         if ( (!strncasecmp(ptr, startary, strlen(startary)))
368                            || (!strncasecmp(ptr, endary, strlen(endary))) ) {
369                                 lprintf(9, "hit boundary!\n");
370                                 if (part_start != NULL) {
371                                         if (strlen(partnum) > 0) {
372                                                 sprintf(nested_partnum, "%s.%d",
373                                                         partnum, ++part_seq);
374                                         }
375                                         else {
376                                                 sprintf(nested_partnum, "%d",
377                                                         ++part_seq);
378                                         }
379                                         the_mime_parser(nested_partnum,
380                                                     part_start, part_end,
381                                                         CallBack,
382                                                         PreMultiPartCallBack,
383                                                         PostMultiPartCallBack,
384                                                         userdata,
385                                                         dont_decode);
386                                 }
387                                 ptr = memreadline(ptr, buf, sizeof(buf));
388                                 part_start = ptr;
389                         }
390                         else {
391                                 part_end = ptr;
392                                 ++ptr;
393                         }
394                 } while ( (strcasecmp(ptr, endary)) && (ptr <= content_end) );
395                 if (PostMultiPartCallBack != NULL) {
396                         PostMultiPartCallBack("", "", partnum, "", NULL,
397                                 content_type, 0, encoding, userdata);
398                 }
399                 return;
400         }
401
402         /* If it's not a multipart message, then do something with it */
403         if (!is_multipart) {
404                 lprintf(9, "doing non-multipart thing\n");
405                 part_start = ptr;
406                 length = 0;
407                 while (ptr < content_end) {
408                         ++ptr;
409                         ++length;
410                 }
411                 part_end = content_end;
412                 
413                 /* Truncate if the header told us to */
414                 if ( (content_length > 0) && (length > content_length) ) {
415                         length = content_length;
416                         lprintf(9, "truncated to %d\n", content_length);
417                 }
418                 
419                 mime_decode(partnum,
420                             part_start, length,
421                             content_type, encoding, disposition,
422                             name, filename,
423                             CallBack, NULL, NULL,
424                             userdata, dont_decode);
425         }
426 }
427
428
429
430 /*
431  * Entry point for the MIME parser.
432  * (This function expects to be fed HEADERS + CONTENT)
433  * Note: NULL can be supplied as content_end; in this case, the message is
434  * considered to have ended when the parser encounters a 0x00 byte.
435  */
436 void mime_parser(char *content_start,
437                 char *content_end,
438
439                  void (*CallBack)
440                   (char *cbname,
441                    char *cbfilename,
442                    char *cbpartnum,
443                    char *cbdisp,
444                    void *cbcontent,
445                    char *cbtype,
446                    size_t cblength,
447                    char *cbencoding,
448                    void *cbuserdata),
449
450                  void (*PreMultiPartCallBack)
451                   (char *cbname,
452                    char *cbfilename,
453                    char *cbpartnum,
454                    char *cbdisp,
455                    void *cbcontent,
456                    char *cbtype,
457                    size_t cblength,
458                    char *cbencoding,
459                    void *cbuserdata),
460
461                  void (*PostMultiPartCallBack)
462                   (char *cbname,
463                    char *cbfilename,
464                    char *cbpartnum,
465                    char *cbdisp,
466                    void *cbcontent,
467                    char *cbtype,
468                    size_t cblength,
469                    char *cbencoding,
470                    void *cbuserdata),
471
472                   void *userdata,
473                   int dont_decode
474 )
475 {
476
477         lprintf(9, "mime_parser() called\n");
478         the_mime_parser("", content_start, content_end,
479                         CallBack,
480                         PreMultiPartCallBack,
481                         PostMultiPartCallBack,
482                         userdata, dont_decode);
483 }