]> code.citadel.org Git - citadel.git/blob - webcit/mime_parser.c
f6a3e79f3e257d5567f9941024bb1a7c2cc938d0
[citadel.git] / webcit / mime_parser.c
1 /*
2  * $Id$
3  *
4  * This is the MIME parser for Citadel.  Sometimes it actually works.
5  *
6  * Copyright (c) 1998-2001 by Art Cancro
7  * This code is distributed under the terms of the GNU General Public License.
8  *
9  */
10
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <stdio.h>
14 #include <signal.h>
15 #include <sys/types.h>
16 #include <ctype.h>
17 #include <string.h>
18 #include <sys/stat.h>
19 #include <errno.h>
20 #include <pthread.h>
21 #include "webcit.h"
22 #include "mime_parser.h"
23
24
25 void extract_key(char *target, char *source, char *key)
26 {
27         int a, b;
28
29         strcpy(target, source);
30         for (a = 0; a < strlen(target); ++a) {
31                 if ((!strncasecmp(&target[a], key, strlen(key)))
32                     && (target[a + strlen(key)] == '=')) {
33                         strcpy(target, &target[a + strlen(key) + 1]);
34                         if (target[0] == 34)
35                                 strcpy(target, &target[1]);
36                         for (b = 0; b < strlen(target); ++b)
37                                 if (target[b] == 34)
38                                         target[b] = 0;
39                         return;
40                 }
41         }
42         strcpy(target, "");
43 }
44
45
46 /*
47  * For non-multipart messages, we need to generate a quickie partnum of "1"
48  * to return to callback functions.  Some callbacks demand it.
49  */
50 char *fixed_partnum(char *supplied_partnum) {
51         if (supplied_partnum == NULL) return "1";
52         if (strlen(supplied_partnum)==0) return "1";
53         return supplied_partnum;
54 }
55
56
57 /*
58  * Given a message or message-part body and a length, handle any necessary
59  * decoding and pass the request up the stack.
60  */
61 void mime_decode(char *partnum,
62                  char *part_start, size_t length,
63                  char *content_type, char *encoding,
64                  char *disposition,
65                  char *name, char *filename,
66                  void (*CallBack)
67                   (char *cbname,
68                    char *cbfilename,
69                    char *cbpartnum,
70                    char *cbdisp,
71                    void *cbcontent,
72                    char *cbtype,
73                    size_t cblength,
74                    char *cbencoding,
75                    void *cbuserdata),
76                  void (*PreMultiPartCallBack)
77                   (char *cbname,
78                    char *cbfilename,
79                    char *cbpartnum,
80                    char *cbdisp,
81                    void *cbcontent,
82                    char *cbtype,
83                    size_t cblength,
84                    char *cbencoding,
85                    void *cbuserdata),
86                  void (*PostMultiPartCallBack)
87                   (char *cbname,
88                    char *cbfilename,
89                    char *cbpartnum,
90                    char *cbdisp,
91                    void *cbcontent,
92                    char *cbtype,
93                    size_t cblength,
94                    char *cbencoding,
95                    void *cbuserdata),
96                   void *userdata,
97                   int dont_decode
98 )
99 {
100
101         char *decoded;
102         struct stat statbuf;
103         int sendpipe[2];
104         int recvpipe[2];
105         int childpid;
106         size_t bytes_sent = 0;
107         size_t bytes_recv = 0;
108         size_t blocksize;
109         int write_error = 0;
110
111         fprintf(stderr, "mime_decode() called\n");
112
113         /* Some encodings aren't really encodings */
114         if (!strcasecmp(encoding, "7bit"))
115                 strcpy(encoding, "");
116         if (!strcasecmp(encoding, "8bit"))
117                 strcpy(encoding, "");
118         if (!strcasecmp(encoding, "binary"))
119                 strcpy(encoding, "");
120
121         /* If this part is not encoded, send as-is */
122         if ( (strlen(encoding) == 0) || (dont_decode)) {
123                 if (CallBack != NULL) {
124                         CallBack(name, filename, fixed_partnum(partnum),
125                                 disposition, part_start,
126                                 content_type, length, encoding, userdata);
127                         }
128                 return;
129         }
130         if ((strcasecmp(encoding, "base64"))
131             && (strcasecmp(encoding, "quoted-printable"))) {
132                 fprintf(stderr, "ERROR: unknown MIME encoding '%s'\n", encoding);
133                 return;
134         }
135         /*
136          * Allocate a buffer for the decoded data.  The output buffer is the
137          * same size as the input buffer; this assumes that the decoded data
138          * will never be larger than the encoded data.  This is a safe
139          * assumption with base64, uuencode, and quoted-printable.  Just to
140          * be safe, we still pad the buffer a bit.
141          */
142         decoded = malloc(length + 1024);
143         if (decoded == NULL) {
144                 fprintf(stderr, "ERROR: cannot allocate memory.\n");
145                 return;
146         }
147         if (pipe(sendpipe) != 0)
148                 return;
149         if (pipe(recvpipe) != 0)
150                 return;
151
152         childpid = fork();
153         if (childpid < 0) {
154                 free(decoded);
155                 return;
156         }
157         if (childpid == 0) {
158                 close(2);
159                 /* send stdio to the pipes */
160                 if (dup2(sendpipe[0], 0) < 0)
161                         fprintf(stderr, "ERROR dup2()\n");
162                 if (dup2(recvpipe[1], 1) < 0)
163                         fprintf(stderr, "ERROR dup2()\n");
164                 close(sendpipe[1]);     /* Close the ends we're not using */
165                 close(recvpipe[0]);
166                 if (!strcasecmp(encoding, "base64"))
167                         execlp("./base64", "base64", "-d", NULL);
168                 else if (!strcasecmp(encoding, "quoted-printable"))
169                         execlp("./qpdecode", "qpdecode", NULL);
170                 fprintf(stderr, "ERROR: cannot exec decoder for %s\n", encoding);
171                 exit(1);
172         }
173         close(sendpipe[0]);     /* Close the ends we're not using  */
174         close(recvpipe[1]);
175
176         while ((bytes_sent < length) && (write_error == 0)) {
177                 /* Empty the input pipe FIRST */
178                 while (fstat(recvpipe[0], &statbuf), (statbuf.st_size > 0)) {
179                         blocksize = read(recvpipe[0], &decoded[bytes_recv],
180                                          statbuf.st_size);
181                         if (blocksize < 0)
182                                 fprintf(stderr, "ERROR: cannot read from pipe\n");
183                         else
184                                 bytes_recv = bytes_recv + blocksize;
185                 }
186                 /* Then put some data into the output pipe */
187                 blocksize = length - bytes_sent;
188                 if (blocksize > 2048)
189                         blocksize = 2048;
190                 if (write(sendpipe[1], &part_start[bytes_sent], blocksize) < 0) {
191                         fprintf(stderr, "ERROR: cannot write to pipe: %s\n",
192                                 strerror(errno));
193                         write_error = 1;
194                 }
195                 bytes_sent = bytes_sent + blocksize;
196         }
197         close(sendpipe[1]);
198         /* Empty the input pipe */
199         while ((blocksize = read(recvpipe[0], &decoded[bytes_recv], 1)),
200                (blocksize > 0)) {
201                 bytes_recv = bytes_recv + blocksize;
202         }
203
204         if (bytes_recv > 0) if (CallBack != NULL) {
205                 CallBack(name, filename, fixed_partnum(partnum),
206                         disposition, decoded,
207                         content_type, bytes_recv, "binary", userdata);
208         }
209
210         free(decoded);
211 }
212
213 /*
214  * Break out the components of a multipart message
215  * (This function expects to be fed HEADERS + CONTENT)
216  * Note: NULL can be supplied as content_end; in this case, the message is
217  * considered to have ended when the parser encounters a 0x00 byte.
218  */
219 void the_mime_parser(char *partnum,
220                      char *content_start, char *content_end,
221                      void (*CallBack)
222                       (char *cbname,
223                        char *cbfilename,
224                        char *cbpartnum,
225                        char *cbdisp,
226                        void *cbcontent,
227                        char *cbtype,
228                        size_t cblength,
229                        char *cbencoding,
230                        void *cbuserdata),
231                      void (*PreMultiPartCallBack)
232                       (char *cbname,
233                        char *cbfilename,
234                        char *cbpartnum,
235                        char *cbdisp,
236                        void *cbcontent,
237                        char *cbtype,
238                        size_t cblength,
239                        char *cbencoding,
240                        void *cbuserdata),
241                      void (*PostMultiPartCallBack)
242                       (char *cbname,
243                        char *cbfilename,
244                        char *cbpartnum,
245                        char *cbdisp,
246                        void *cbcontent,
247                        char *cbtype,
248                        size_t cblength,
249                        char *cbencoding,
250                        void *cbuserdata),
251                       void *userdata,
252                       int dont_decode
253 )
254 {
255
256         char *ptr;
257         char *part_start, *part_end = NULL;
258         char buf[SIZ];
259         char header[SIZ];
260         char boundary[SIZ];
261         char startary[SIZ];
262         char endary[SIZ];
263         char content_type[SIZ];
264         size_t content_length;
265         char encoding[SIZ];
266         char disposition[SIZ];
267         char name[SIZ];
268         char filename[SIZ];
269         int is_multipart;
270         int part_seq = 0;
271         int i;
272         size_t length;
273         char nested_partnum[SIZ];
274
275         fprintf(stderr, "the_mime_parser() called\n");
276         ptr = content_start;
277         memset(boundary, 0, sizeof boundary);
278         memset(content_type, 0, sizeof content_type);
279         memset(encoding, 0, sizeof encoding);
280         memset(name, 0, sizeof name);
281         memset(filename, 0, sizeof filename);
282         memset(disposition, 0, sizeof disposition);
283         content_length = 0;
284
285         /* If the caller didn't supply an endpointer, generate one by measure */
286         if (content_end == NULL) {
287                 content_end = &content_start[strlen(content_start)];
288         }
289
290         /* Learn interesting things from the headers */
291         strcpy(header, "");
292         do {
293                 ptr = memreadline(ptr, buf, sizeof buf);
294                 if (ptr >= content_end)
295                         return;
296
297                 for (i = 0; i < strlen(buf); ++i)
298                         if (isspace(buf[i]))
299                                 buf[i] = ' ';
300                 if (!isspace(buf[0])) {
301                         if (!strncasecmp(header, "Content-type: ", 14)) {
302                                 strcpy(content_type, &header[14]);
303                                 extract_key(name, content_type, "name");
304                         }
305                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
306                                 strcpy(disposition, &header[21]);
307                                 extract_key(filename, disposition, "filename");
308                         }
309                         if (!strncasecmp(header, "Content-length: ", 16)) {
310                                 content_length = (size_t) atol(&header[16]);
311                         }
312                         if (!strncasecmp(header,
313                                       "Content-transfer-encoding: ", 27))
314                                 strcpy(encoding, &header[27]);
315                         if (strlen(boundary) == 0)
316                                 extract_key(boundary, header, "boundary");
317                         strcpy(header, "");
318                 }
319                 if ((strlen(header) + strlen(buf) + 2) < sizeof(header))
320                         strcat(header, buf);
321         } while ((strlen(buf) > 0) && (*ptr != 0));
322
323         for (i = 0; i < strlen(disposition); ++i)
324                 if (disposition[i] == ';')
325                         disposition[i] = 0;
326         while (isspace(disposition[0]))
327                 strcpy(disposition, &disposition[1]);
328         for (i = 0; i < strlen(content_type); ++i)
329                 if (content_type[i] == ';')
330                         content_type[i] = 0;
331         while (isspace(content_type[0]))
332                 strcpy(content_type, &content_type[1]);
333
334         if (strlen(boundary) > 0) {
335                 is_multipart = 1;
336         } else {
337                 is_multipart = 0;
338         }
339
340         fprintf(stderr, "is_multipart=%d, boundary=<%s>\n",
341                 is_multipart, boundary);
342
343         /* If this is a multipart message, then recursively process it */
344         part_start = NULL;
345         if (is_multipart) {
346
347                 /* Tell the client about this message's multipartedness */
348                 if (PreMultiPartCallBack != NULL) {
349                         PreMultiPartCallBack("", "", partnum, "",
350                                 NULL, content_type,
351                                 0, encoding, userdata);
352                 }
353
354                 /* Figure out where the boundaries are */
355                 sprintf(startary, "--%s", boundary);
356                 sprintf(endary, "--%s--", boundary);
357                 do {
358                         /* if (ptr >= content_end) goto END_MULTI; */
359
360                         if ( (!strncasecmp(ptr, startary, strlen(startary)))
361                            || (!strncasecmp(ptr, endary, strlen(endary))) ) {
362                                 fprintf(stderr, "hit boundary!\n");
363                                 if (part_start != NULL) {
364                                         if (strlen(partnum) > 0) {
365                                                 sprintf(nested_partnum, "%s.%d",
366                                                         partnum, ++part_seq);
367                                         }
368                                         else {
369                                                 sprintf(nested_partnum, "%d",
370                                                         ++part_seq);
371                                         }
372                                         the_mime_parser(nested_partnum,
373                                                     part_start, part_end,
374                                                         CallBack,
375                                                         PreMultiPartCallBack,
376                                                         PostMultiPartCallBack,
377                                                         userdata,
378                                                         dont_decode);
379                                 }
380                                 ptr = memreadline(ptr, buf, sizeof(buf));
381                                 part_start = ptr;
382                         }
383                         else {
384                                 part_end = ptr;
385                                 ++ptr;
386                         }
387                 } while ( (strcasecmp(ptr, endary)) && (ptr <= content_end) );
388                 if (PostMultiPartCallBack != NULL) {
389                         PostMultiPartCallBack("", "", partnum, "", NULL,
390                                 content_type, 0, encoding, userdata);
391                 }
392                 return;
393         }
394
395         /* If it's not a multipart message, then do something with it */
396         if (!is_multipart) {
397                 fprintf(stderr, "doing non-multipart thing\n");
398                 part_start = ptr;
399                 length = 0;
400                 while (ptr < content_end) {
401                         ++ptr;
402                         ++length;
403                 }
404                 part_end = content_end;
405                 
406                 /* Truncate if the header told us to */
407                 if ( (content_length > 0) && (length > content_length) ) {
408                         length = content_length;
409                         fprintf(stderr, "truncated to %d\n", content_length);
410                 }
411                 
412                 mime_decode(partnum,
413                             part_start, length,
414                             content_type, encoding, disposition,
415                             name, filename,
416                             CallBack, NULL, NULL,
417                             userdata, dont_decode);
418         }
419 }
420
421
422
423 /*
424  * Entry point for the MIME parser.
425  * (This function expects to be fed HEADERS + CONTENT)
426  * Note: NULL can be supplied as content_end; in this case, the message is
427  * considered to have ended when the parser encounters a 0x00 byte.
428  */
429 void mime_parser(char *content_start,
430                 char *content_end,
431
432                  void (*CallBack)
433                   (char *cbname,
434                    char *cbfilename,
435                    char *cbpartnum,
436                    char *cbdisp,
437                    void *cbcontent,
438                    char *cbtype,
439                    size_t cblength,
440                    char *cbencoding,
441                    void *cbuserdata),
442
443                  void (*PreMultiPartCallBack)
444                   (char *cbname,
445                    char *cbfilename,
446                    char *cbpartnum,
447                    char *cbdisp,
448                    void *cbcontent,
449                    char *cbtype,
450                    size_t cblength,
451                    char *cbencoding,
452                    void *cbuserdata),
453
454                  void (*PostMultiPartCallBack)
455                   (char *cbname,
456                    char *cbfilename,
457                    char *cbpartnum,
458                    char *cbdisp,
459                    void *cbcontent,
460                    char *cbtype,
461                    size_t cblength,
462                    char *cbencoding,
463                    void *cbuserdata),
464
465                   void *userdata,
466                   int dont_decode
467 )
468 {
469
470         fprintf(stderr, "mime_parser() called\n");
471         the_mime_parser("", content_start, content_end,
472                         CallBack,
473                         PreMultiPartCallBack,
474                         PostMultiPartCallBack,
475                         userdata, dont_decode);
476 }