4 * This is the MIME parser for Citadel.
6 * Copyright (c) 1998-2006 by Art Cancro
7 * This code is distributed under the GNU General Public License v2.
15 #include <sys/types.h>
23 #include "sysdep_decls.h"
26 #include "mime_parser.h"
29 void extract_key(char *target, char *source, char *key)
32 char looking_for[256];
33 int double_quotes = 0;
35 snprintf(looking_for, sizeof looking_for, "%s=", key);
37 ptr = bmstrcasestr(source, looking_for);
42 strcpy(target, (ptr + strlen(looking_for)));
44 for (ptr=target; (*ptr != 0); ++ptr) {
46 /* A semicolon means we've hit the end of the key, unless we're inside double quotes */
47 if ( (double_quotes != 1) && (*ptr == ';')) {
51 /* if we find double quotes, we've got a great set of string boundaries */
54 if (double_quotes == 1) {
66 * For non-multipart messages, we need to generate a quickie partnum of "1"
67 * to return to callback functions. Some callbacks demand it.
69 char *fixed_partnum(char *supplied_partnum) {
70 if (supplied_partnum == NULL) return "1";
71 if (strlen(supplied_partnum)==0) return "1";
72 return supplied_partnum;
78 * Given a message or message-part body and a length, handle any necessary
79 * decoding and pass the request up the stack.
81 void mime_decode(char *partnum,
82 char *part_start, size_t length,
83 char *content_type, char *charset, char *encoding,
85 char *name, char *filename,
97 void (*PreMultiPartCallBack)
108 void (*PostMultiPartCallBack)
125 size_t bytes_decoded = 0;
127 /* Some encodings aren't really encodings */
128 if (!strcasecmp(encoding, "7bit"))
129 strcpy(encoding, "");
130 if (!strcasecmp(encoding, "8bit"))
131 strcpy(encoding, "");
132 if (!strcasecmp(encoding, "binary"))
133 strcpy(encoding, "");
135 /* If this part is not encoded, send as-is */
136 if ( (strlen(encoding) == 0) || (dont_decode)) {
137 if (CallBack != NULL) {
138 CallBack(name, filename, fixed_partnum(partnum),
139 disposition, part_start,
140 content_type, charset, length, encoding, userdata);
145 /* Fail silently if we hit an unknown encoding. */
146 if ((strcasecmp(encoding, "base64"))
147 && (strcasecmp(encoding, "quoted-printable"))) {
152 * Allocate a buffer for the decoded data. The output buffer is slightly
153 * larger than the input buffer; this assumes that the decoded data
154 * will never be significantly larger than the encoded data. This is a
155 * safe assumption with base64, uuencode, and quoted-printable.
157 decoded = malloc(length + 32768);
158 if (decoded == NULL) {
162 if (!strcasecmp(encoding, "base64")) {
163 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
165 else if (!strcasecmp(encoding, "quoted-printable")) {
166 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
169 if (bytes_decoded > 0) if (CallBack != NULL) {
170 CallBack(name, filename, fixed_partnum(partnum),
171 disposition, decoded,
172 content_type, charset, bytes_decoded, "binary", userdata);
179 * Break out the components of a multipart message
180 * (This function expects to be fed HEADERS + CONTENT)
181 * Note: NULL can be supplied as content_end; in this case, the message is
182 * considered to have ended when the parser encounters a 0x00 byte.
184 void the_mime_parser(char *partnum,
185 char *content_start, char *content_end,
197 void (*PreMultiPartCallBack)
208 void (*PostMultiPartCallBack)
226 char *part_start, *part_end = NULL;
231 size_t startary_len = 0;
236 size_t content_length;
240 char *content_type_name;
241 char *content_disposition_name;
247 char nested_partnum[256];
249 char *evaluate_crlf_ptr = NULL;
256 boundary = malloc(SIZ);
257 memset(boundary, 0, SIZ);
259 startary = malloc(SIZ);
260 memset(startary, 0, SIZ);
262 endary = malloc(SIZ);
263 memset(endary, 0, SIZ);
265 header = malloc(SIZ);
266 memset(header, 0, SIZ);
268 content_type = malloc(SIZ);
269 memset(content_type, 0, SIZ);
271 charset = malloc(SIZ);
272 memset(charset, 0, SIZ);
274 encoding = malloc(SIZ);
275 memset(encoding, 0, SIZ);
277 content_type_name = malloc(SIZ);
278 memset(content_type_name, 0, SIZ);
280 content_disposition_name = malloc(SIZ);
281 memset(content_disposition_name, 0, SIZ);
283 filename = malloc(SIZ);
284 memset(filename, 0, SIZ);
286 disposition = malloc(SIZ);
287 memset(disposition, 0, SIZ);
289 /* If the caller didn't supply an endpointer, generate one by measure */
290 if (content_end == NULL) {
291 content_end = &content_start[strlen(content_start)];
294 /* Learn interesting things from the headers */
298 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
299 if (ptr >= content_end) {
303 for (i = 0; i < buflen; ++i) {
304 if (isspace(buf[i])) {
309 if (!isspace(buf[0])) {
310 if (!strncasecmp(header, "Content-type:", 13)) {
311 strcpy(content_type, &header[13]);
312 striplt(content_type);
313 extract_key(content_type_name, content_type, "name");
314 extract_key(charset, content_type, "charset");
315 extract_key(boundary, header, "boundary");
316 /* Deal with weird headers */
317 if (strchr(content_type, ' '))
318 *(strchr(content_type, ' ')) = '\0';
319 if (strchr(content_type, ';'))
320 *(strchr(content_type, ';')) = '\0';
322 if (!strncasecmp(header, "Content-Disposition:", 20)) {
323 strcpy(disposition, &header[20]);
324 striplt(disposition);
325 extract_key(content_disposition_name, disposition, "name");
326 extract_key(filename, disposition, "filename");
328 if (!strncasecmp(header, "Content-length: ", 15)) {
330 safestrncpy(clbuf, &header[15], sizeof clbuf);
332 content_length = (size_t) atol(clbuf);
334 if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
335 strcpy(encoding, &header[26]);
341 if ((headerlen + buflen + 2) < SIZ) {
342 memcpy(&header[headerlen], buf, buflen);
344 header[headerlen] = '\0';
346 } while ((!IsEmptyStr(buf)) && (*ptr != 0));
348 if (strchr(disposition, ';'))
349 *(strchr(disposition, ';')) = '\0';
350 striplt(disposition);
351 if (strchr(content_type, ';'))
352 *(strchr(content_type, ';')) = '\0';
353 striplt(content_type);
355 if (!IsEmptyStr(boundary)) {
361 /* If this is a multipart message, then recursively process it */
365 /* Tell the client about this message's multipartedness */
366 if (PreMultiPartCallBack != NULL) {
367 PreMultiPartCallBack("", "", partnum, "",
368 NULL, content_type, charset,
369 0, encoding, userdata);
372 /* Figure out where the boundaries are */
373 snprintf(startary, SIZ, "--%s", boundary);
374 snprintf(endary, SIZ, "--%s--", boundary);
375 startary_len = strlen(startary);
379 next_boundary = NULL;
380 for (srch=ptr; srch<content_end; ++srch) {
381 if (!memcmp(srch, startary, startary_len)) {
382 next_boundary = srch;
387 if ( (part_start != NULL) && (next_boundary != NULL) ) {
388 part_end = next_boundary;
389 --part_end; /* omit the trailing LF */
391 --part_end; /* omit the trailing CR */
394 if (!IsEmptyStr(partnum)) {
395 snprintf(nested_partnum,
396 sizeof nested_partnum,
401 snprintf(nested_partnum,
402 sizeof nested_partnum,
405 the_mime_parser(nested_partnum,
406 part_start, part_end,
408 PreMultiPartCallBack,
409 PostMultiPartCallBack,
414 if (next_boundary != NULL) {
415 /* If we pass out of scope, don't attempt to
416 * read past the end boundary. */
417 if (!strcmp(next_boundary, endary)) {
421 /* Set up for the next part. */
422 part_start = strstr(next_boundary, "\n");
424 /* Determine whether newlines are LF or CRLF */
425 evaluate_crlf_ptr = part_start;
427 if (!memcmp(evaluate_crlf_ptr, "\r\n", 2)) {
434 /* Advance past the LF ... now we're in the next part */
440 /* Invalid end of multipart. Bail out! */
443 } while ( (ptr < content_end) && (next_boundary != NULL) );
445 if (PostMultiPartCallBack != NULL) {
446 PostMultiPartCallBack("", "", partnum, "", NULL,
447 content_type, charset, 0, encoding, userdata);
452 /* If it's not a multipart message, then do something with it */
456 while (ptr < content_end) {
460 part_end = content_end;
463 * I thought there was an off-by-one error here, but there isn't.
464 * This probably means that there's an off-by-one error somewhere
465 * else ... or maybe only in certain messages?
470 /* Truncate if the header told us to */
471 if ( (content_length > 0) && (length > content_length) ) {
472 length = content_length;
475 /* Sometimes the "name" field is tacked on to Content-type,
476 * and sometimes it's tacked on to Content-disposition. Use
477 * whichever one we have.
479 if (strlen(content_disposition_name) > strlen(content_type_name)) {
480 name = content_disposition_name;
483 name = content_type_name;
486 /* lprintf(CTDL_DEBUG, "mime_decode part=%s, len=%d, type=%s, charset=%s, encoding=%s\n",
487 partnum, length, content_type, charset, encoding); */
489 /* Ok, we've got a non-multipart part here, so do something with it.
493 content_type, charset, encoding, disposition,
495 CallBack, NULL, NULL,
496 userdata, dont_decode
500 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
502 if (!strcasecmp(content_type, "message/rfc822")) {
504 if (PreMultiPartCallBack != NULL) {
505 PreMultiPartCallBack("", "", partnum, "",
506 NULL, content_type, charset,
507 0, encoding, userdata);
509 if (CallBack != NULL) {
510 if (strlen(partnum) > 0) {
511 snprintf(nested_partnum,
512 sizeof nested_partnum,
517 snprintf(nested_partnum,
518 sizeof nested_partnum,
521 the_mime_parser(nested_partnum,
522 part_start, part_end,
524 PreMultiPartCallBack,
525 PostMultiPartCallBack,
530 if (PostMultiPartCallBack != NULL) {
531 PostMultiPartCallBack("", "", partnum, "", NULL,
532 content_type, charset, 0, encoding, userdata);
540 end_parser: /* free the buffers! end the oppression!! */
548 free(content_type_name);
549 free(content_disposition_name);
557 * Entry point for the MIME parser.
558 * (This function expects to be fed HEADERS + CONTENT)
559 * Note: NULL can be supplied as content_end; in this case, the message is
560 * considered to have ended when the parser encounters a 0x00 byte.
562 void mime_parser(char *content_start,
577 void (*PreMultiPartCallBack)
589 void (*PostMultiPartCallBack)
606 the_mime_parser("", content_start, content_end,
608 PreMultiPartCallBack,
609 PostMultiPartCallBack,
610 userdata, dont_decode);