5 * \defgroup MIME This is the MIME parser for Citadel.
7 * Copyright (c) 1998-2005 by Art Cancro
8 * This code is distributed under the terms of the GNU General Public License.
9 * \ingroup WebcitHttpServer
13 #include "webserver.h"
14 #include "mime_parser.h"
18 * \param target where to put the mime buffer at???
19 * \param source where to extract the mimetype from
22 void extract_key(char *target, char *source, char *key)
26 strcpy(target, source);
27 for (a = 0; a < strlen(target); ++a) {
28 if ((!strncasecmp(&target[a], key, strlen(key)))
29 && (target[a + strlen(key)] == '=')) {
30 strcpy(target, &target[a + strlen(key) + 1]);
32 strcpy(target, &target[1]);
33 for (b = 0; b < strlen(target); ++b)
44 * \brief For non-multipart messages, we need to generate a quickie partnum of "1"
45 * to return to callback functions. Some callbacks demand it.
46 * \param supplied_partnum partnum to convert
47 * \return the converted num
49 char *fixed_partnum(char *supplied_partnum) {
50 if (supplied_partnum == NULL) return "1";
51 if (strlen(supplied_partnum)==0) return "1";
52 return supplied_partnum;
58 * \brief Convert "quoted-printable" to binary. Returns number of bytes decoded.
59 * \param decoded the buffer with the decoded output
60 * \param encoded the encoded string to decode
61 * \param sourcelen length of the decoded buffer
63 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
66 int soft_line_break = 0;
68 int decoded_length = 0;
76 for (i = 0; i < sourcelen; ++i) {
78 buf[buf_length++] = encoded[i];
80 if ( (encoded[i] == '\n')
82 || (i == (sourcelen-1)) ) {
83 buf[buf_length++] = 0;
85 /*** begin -- process one line ***/
87 if (buf[strlen(buf)-1] == '\n') {
88 buf[strlen(buf)-1] = 0;
90 if (buf[strlen(buf)-1] == '\r') {
91 buf[strlen(buf)-1] = 0;
93 while (isspace(buf[strlen(buf)-1])) {
94 buf[strlen(buf)-1] = 0;
98 while (strlen(buf) > 0) {
99 if (!strcmp(buf, "=")) {
102 } else if ((strlen(buf)>=3) && (buf[0]=='=')) {
103 sscanf(&buf[1], "%02x", &ch);
104 decoded[decoded_length++] = ch;
105 strcpy(buf, &buf[3]);
107 decoded[decoded_length++] = buf[0];
108 strcpy(buf, &buf[1]);
111 if (soft_line_break == 0) {
112 decoded[decoded_length++] = '\r';
113 decoded[decoded_length++] = '\n';
116 /*** end -- process one line ***/
120 decoded[decoded_length++] = 0;
121 return(decoded_length);
125 * \brief fully decode a message
126 * Given a message or message-part body and a length, handle any necessary
127 * decoding and pass the request up the stack.
128 * \param partnum todo ?????
129 * \param part_start todo
131 * \param content_type todo
132 * \param charset todo
133 * \param encoding todo
134 * \param disposition todo
136 * \param filename todo
137 * \param CallBack todo
138 * \param PreMultiPartCallBack todo
139 * \param PostMultiPartCallBack todo
140 * \param userdata todo
141 * \param dont_decode todo
143 void mime_decode(char *partnum,
144 char *part_start, size_t length,
145 char *content_type, char *charset, char *encoding,
147 char *name, char *filename,
159 void (*PreMultiPartCallBack)
170 void (*PostMultiPartCallBack)
187 size_t bytes_decoded = 0;
189 /* Some encodings aren't really encodings */
190 if (!strcasecmp(encoding, "7bit"))
191 strcpy(encoding, "");
192 if (!strcasecmp(encoding, "8bit"))
193 strcpy(encoding, "");
194 if (!strcasecmp(encoding, "binary"))
195 strcpy(encoding, "");
197 /* If this part is not encoded, send as-is */
198 if ( (strlen(encoding) == 0) || (dont_decode)) {
199 if (CallBack != NULL) {
200 CallBack(name, filename, fixed_partnum(partnum),
201 disposition, part_start,
202 content_type, charset, length, encoding, userdata);
207 if ((strcasecmp(encoding, "base64"))
208 && (strcasecmp(encoding, "quoted-printable"))) {
212 * Allocate a buffer for the decoded data. The output buffer is the
213 * same size as the input buffer; this assumes that the decoded data
214 * will never be larger than the encoded data. This is a safe
215 * assumption with base64, uuencode, and quoted-printable.
217 decoded = malloc(length+2048);
218 if (decoded == NULL) {
222 if (!strcasecmp(encoding, "base64")) {
223 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
225 else if (!strcasecmp(encoding, "quoted-printable")) {
226 bytes_decoded = CtdlDecodeQuotedPrintable(decoded,
230 if (bytes_decoded > 0) if (CallBack != NULL) {
231 CallBack(name, filename, fixed_partnum(partnum),
232 disposition, decoded,
233 content_type, charset, bytes_decoded, "binary", userdata);
240 * \brief Break out the components of a multipart message
241 * (This function expects to be fed HEADERS + CONTENT)
242 * Note: NULL can be supplied as content_end; in this case, the message is
243 * considered to have ended when the parser encounters a 0x00 byte.
244 * \param partnum todo
245 * \param content_start todo ?????
246 * \param content_end todo
247 * \param CallBack todo
248 * \param PreMultiPartCallBack
249 * \param PostMultiPartCallBack
250 * \param userdata todo
251 * \param dont_decode todo
253 void the_mime_parser(char *partnum,
254 char *content_start, char *content_end,
266 void (*PreMultiPartCallBack)
277 void (*PostMultiPartCallBack)
295 char *part_start, *part_end = NULL;
300 size_t startary_len = 0;
305 size_t content_length;
309 char *content_type_name;
310 char *content_disposition_name;
316 char nested_partnum[SIZ];
321 boundary = malloc(SIZ);
322 memset(boundary, 0, SIZ);
324 startary = malloc(SIZ);
325 memset(startary, 0, SIZ);
327 endary = malloc(SIZ);
328 memset(endary, 0, SIZ);
330 header = malloc(SIZ);
331 memset(header, 0, SIZ);
333 content_type = malloc(SIZ);
334 memset(content_type, 0, SIZ);
336 charset = malloc(SIZ);
337 memset(charset, 0, SIZ);
339 encoding = malloc(SIZ);
340 memset(encoding, 0, SIZ);
342 content_type_name = malloc(SIZ);
343 memset(content_type_name, 0, SIZ);
345 content_disposition_name = malloc(SIZ);
346 memset(content_disposition_name, 0, SIZ);
348 filename = malloc(SIZ);
349 memset(filename, 0, SIZ);
351 disposition = malloc(SIZ);
352 memset(disposition, 0, SIZ);
354 /** If the caller didn't supply an endpointer, generate one by measure */
355 if (content_end == NULL) {
356 content_end = &content_start[strlen(content_start)];
359 /** Learn interesting things from the headers */
362 ptr = memreadline(ptr, buf, SIZ);
363 if (ptr >= content_end) {
367 for (i = 0; i < strlen(buf); ++i) {
368 if (isspace(buf[i])) {
373 if (!isspace(buf[0])) {
374 if (!strncasecmp(header, "Content-type: ", 14)) {
375 strcpy(content_type, &header[14]);
376 extract_key(content_type_name, content_type, "name");
377 extract_key(charset, content_type, "charset");
378 /** Deal with weird headers */
379 if (strchr(content_type, ' '))
380 *(strchr(content_type, ' ')) = '\0';
381 if (strchr(content_type, ';'))
382 *(strchr(content_type, ';')) = '\0';
384 if (!strncasecmp(header, "Content-Disposition: ", 21)) {
385 strcpy(disposition, &header[21]);
386 extract_key(content_disposition_name, disposition, "name");
387 extract_key(filename, disposition, "filename");
389 if (!strncasecmp(header, "Content-length: ", 16)) {
390 content_length = (size_t) atol(&header[16]);
392 if (!strncasecmp(header,
393 "Content-transfer-encoding: ", 27))
394 strcpy(encoding, &header[27]);
395 if (strlen(boundary) == 0)
396 extract_key(boundary, header, "boundary");
399 if ((strlen(header) + strlen(buf) + 2) < SIZ)
401 } while ((strlen(buf) > 0) && (*ptr != 0));
403 if (strchr(disposition, ';'))
404 *(strchr(disposition, ';')) = '\0';
405 striplt(disposition);
406 if (strchr(content_type, ';'))
407 *(strchr(content_type, ';')) = '\0';
408 striplt(content_type);
410 if (strlen(boundary) > 0) {
416 /** If this is a multipart message, then recursively process it */
420 /** Tell the client about this message's multipartedness */
421 if (PreMultiPartCallBack != NULL) {
422 PreMultiPartCallBack("", "", partnum, "",
423 NULL, content_type, charset,
424 0, encoding, userdata);
427 /** Figure out where the boundaries are */
428 snprintf(startary, SIZ, "--%s", boundary);
429 snprintf(endary, SIZ, "--%s--", boundary);
430 startary_len = strlen(startary);
434 next_boundary = NULL;
435 for (srch=ptr; srch<content_end; ++srch) {
436 if (!memcmp(srch, startary, startary_len)) {
437 next_boundary = srch;
442 if ( (part_start != NULL) && (next_boundary != NULL) ) {
443 part_end = next_boundary;
446 if (strlen(partnum) > 0) {
447 snprintf(nested_partnum,
448 sizeof nested_partnum,
453 snprintf(nested_partnum,
454 sizeof nested_partnum,
457 the_mime_parser(nested_partnum,
458 part_start, part_end,
460 PreMultiPartCallBack,
461 PostMultiPartCallBack,
466 if (next_boundary != NULL) {
468 * If we pass out of scope, don't attempt to
469 * read past the end boundary. */
470 if (!strcmp(next_boundary, endary)) {
474 /** Set up for the next part. */
475 part_start = strstr(next_boundary, "\n");
481 /** Invalid end of multipart. Bail out! */
484 } while ( (ptr < content_end) && (next_boundary != NULL) );
486 if (PostMultiPartCallBack != NULL) {
487 PostMultiPartCallBack("", "", partnum, "", NULL,
488 content_type, charset, 0, encoding, userdata);
493 /** If it's not a multipart message, then do something with it */
497 while (ptr < content_end) {
501 part_end = content_end;
502 /** fix an off-by-one error */
506 /** Truncate if the header told us to */
507 if ( (content_length > 0) && (length > content_length) ) {
508 length = content_length;
512 * Sometimes the "name" field is tacked on to Content-type,
513 * and sometimes it's tacked on to Content-disposition. Use
514 * whichever one we have.
516 if (strlen(content_disposition_name) > strlen(content_type_name)) {
517 name = content_disposition_name;
520 name = content_type_name;
524 lprintf(9, "mime_decode part=%s, len=%d, type=%s, charset=%s, encoding=%s\n",
525 partnum, length, content_type, charset, encoding);
529 * Ok, we've got a non-multipart part here, so do something with it.
533 content_type, charset, encoding, disposition,
535 CallBack, NULL, NULL,
536 userdata, dont_decode
540 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
542 if (!strcasecmp(content_type, "message/rfc822")) {
544 if (PreMultiPartCallBack != NULL) {
545 PreMultiPartCallBack("", "", partnum, "",
546 NULL, content_type, charset,
547 0, encoding, userdata);
549 if (CallBack != NULL) {
550 if (strlen(partnum) > 0) {
551 snprintf(nested_partnum,
552 sizeof nested_partnum,
557 snprintf(nested_partnum,
558 sizeof nested_partnum,
561 the_mime_parser(nested_partnum,
562 part_start, part_end,
564 PreMultiPartCallBack,
565 PostMultiPartCallBack,
570 if (PostMultiPartCallBack != NULL) {
571 PostMultiPartCallBack("", "", partnum, "", NULL,
572 content_type, charset, 0, encoding, userdata);
580 end_parser: /** free the buffers! end the oppression!! */
588 free(content_type_name);
589 free(content_disposition_name);
597 * \brief Entry point for the MIME parser.
598 * (This function expects to be fed HEADERS + CONTENT)
599 * Note: NULL can be supplied as content_end; in this case, the message is
600 * considered to have ended when the parser encounters a 0x00 byte.
601 * \param content_start todo ?????????
602 * \param content_end todo
603 * \param CallBack todo
604 * \param PreMultiPartCallBack todo
605 * \param PostMultiPartCallBack todo
606 * \param userdata todo
607 * \param dont_decode todo
609 void mime_parser(char *content_start,
624 void (*PreMultiPartCallBack)
636 void (*PostMultiPartCallBack)
653 the_mime_parser("", content_start, content_end,
655 PreMultiPartCallBack,
656 PostMultiPartCallBack,
657 userdata, dont_decode);