5 * \defgroup MIME This is the MIME parser for Citadel.
7 * Copyright (c) 1998-2005 by Art Cancro
8 * This code is distributed under the terms of the GNU General Public License.
9 * \ingroup WebcitHttpServer
13 #include "webserver.h"
14 #include "mime_parser.h"
16 void extract_key(char *target, char *source, char *key)
19 char looking_for[256];
20 int double_quotes = 0;
22 snprintf(looking_for, sizeof looking_for, "%s=", key);
24 ptr = bmstrcasestr(source, looking_for);
29 strcpy(target, (ptr + strlen(looking_for)));
31 for (ptr=target; (*ptr != 0); ++ptr) {
33 /* A semicolon means we've hit the end of the key, unless we're inside double quotes */
34 if ( (double_quotes != 1) && (*ptr == ';')) {
38 /* if we find double quotes, we've got a great set of string boundaries */
41 if (double_quotes == 1) {
53 * For non-multipart messages, we need to generate a quickie partnum of "1"
54 * to return to callback functions. Some callbacks demand it.
56 char *fixed_partnum(char *supplied_partnum) {
57 if (supplied_partnum == NULL) return "1";
58 if (strlen(supplied_partnum)==0) return "1";
59 return supplied_partnum;
65 * Given a message or message-part body and a length, handle any necessary
66 * decoding and pass the request up the stack.
68 void mime_decode(char *partnum,
69 char *part_start, size_t length,
70 char *content_type, char *charset, char *encoding,
72 char *name, char *filename,
84 void (*PreMultiPartCallBack)
95 void (*PostMultiPartCallBack)
112 size_t bytes_decoded = 0;
114 /* Some encodings aren't really encodings */
115 if (!strcasecmp(encoding, "7bit"))
116 strcpy(encoding, "");
117 if (!strcasecmp(encoding, "8bit"))
118 strcpy(encoding, "");
119 if (!strcasecmp(encoding, "binary"))
120 strcpy(encoding, "");
122 /* If this part is not encoded, send as-is */
123 if ( (strlen(encoding) == 0) || (dont_decode)) {
124 if (CallBack != NULL) {
125 CallBack(name, filename, fixed_partnum(partnum),
126 disposition, part_start,
127 content_type, charset, length, encoding, userdata);
132 /* Fail silently if we hit an unknown encoding. */
133 if ((strcasecmp(encoding, "base64"))
134 && (strcasecmp(encoding, "quoted-printable"))) {
139 * Allocate a buffer for the decoded data. The output buffer is slightly
140 * larger than the input buffer; this assumes that the decoded data
141 * will never be significantly larger than the encoded data. This is a
142 * safe assumption with base64, uuencode, and quoted-printable.
144 decoded = malloc(length + 32768);
145 if (decoded == NULL) {
149 if (!strcasecmp(encoding, "base64")) {
150 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
152 else if (!strcasecmp(encoding, "quoted-printable")) {
153 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
156 if (bytes_decoded > 0) if (CallBack != NULL) {
157 CallBack(name, filename, fixed_partnum(partnum),
158 disposition, decoded,
159 content_type, charset, bytes_decoded, "binary", userdata);
166 * Break out the components of a multipart message
167 * (This function expects to be fed HEADERS + CONTENT)
168 * Note: NULL can be supplied as content_end; in this case, the message is
169 * considered to have ended when the parser encounters a 0x00 byte.
171 void the_mime_parser(char *partnum,
172 char *content_start, char *content_end,
184 void (*PreMultiPartCallBack)
195 void (*PostMultiPartCallBack)
213 char *part_start, *part_end = NULL;
218 size_t startary_len = 0;
223 size_t content_length;
227 char *content_type_name;
228 char *content_disposition_name;
234 char nested_partnum[256];
236 char *evaluate_crlf_ptr = NULL;
243 boundary = malloc(SIZ);
244 memset(boundary, 0, SIZ);
246 startary = malloc(SIZ);
247 memset(startary, 0, SIZ);
249 endary = malloc(SIZ);
250 memset(endary, 0, SIZ);
252 header = malloc(SIZ);
253 memset(header, 0, SIZ);
255 content_type = malloc(SIZ);
256 memset(content_type, 0, SIZ);
258 charset = malloc(SIZ);
259 memset(charset, 0, SIZ);
261 encoding = malloc(SIZ);
262 memset(encoding, 0, SIZ);
264 content_type_name = malloc(SIZ);
265 memset(content_type_name, 0, SIZ);
267 content_disposition_name = malloc(SIZ);
268 memset(content_disposition_name, 0, SIZ);
270 filename = malloc(SIZ);
271 memset(filename, 0, SIZ);
273 disposition = malloc(SIZ);
274 memset(disposition, 0, SIZ);
276 /* If the caller didn't supply an endpointer, generate one by measure */
277 if (content_end == NULL) {
278 content_end = &content_start[strlen(content_start)];
281 /* Learn interesting things from the headers */
285 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
286 if (ptr >= content_end) {
290 for (i = 0; i < buflen; ++i) {
291 if (isspace(buf[i])) {
296 if (!isspace(buf[0])) {
297 if (!strncasecmp(header, "Content-type:", 13)) {
298 strcpy(content_type, &header[13]);
299 striplt(content_type);
300 extract_key(content_type_name, content_type, "name");
301 extract_key(charset, content_type, "charset");
302 extract_key(boundary, header, "boundary");
303 /* Deal with weird headers */
304 if (strchr(content_type, ' '))
305 *(strchr(content_type, ' ')) = '\0';
306 if (strchr(content_type, ';'))
307 *(strchr(content_type, ';')) = '\0';
309 if (!strncasecmp(header, "Content-Disposition:", 20)) {
310 strcpy(disposition, &header[20]);
311 striplt(disposition);
312 extract_key(content_disposition_name, disposition, "name");
313 extract_key(filename, disposition, "filename");
315 if (!strncasecmp(header, "Content-length: ", 15)) {
317 safestrncpy(clbuf, &header[15], sizeof clbuf);
319 content_length = (size_t) atol(clbuf);
321 if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
322 strcpy(encoding, &header[26]);
328 if ((headerlen + buflen + 2) < SIZ) {
329 memcpy(&header[headerlen], buf, buflen);
331 header[headerlen] = '\0';
333 } while ((!IsEmptyStr(buf)) && (*ptr != 0));
335 if (strchr(disposition, ';'))
336 *(strchr(disposition, ';')) = '\0';
337 striplt(disposition);
338 if (strchr(content_type, ';'))
339 *(strchr(content_type, ';')) = '\0';
340 striplt(content_type);
342 if (!IsEmptyStr(boundary)) {
348 /* If this is a multipart message, then recursively process it */
352 /* Tell the client about this message's multipartedness */
353 if (PreMultiPartCallBack != NULL) {
354 PreMultiPartCallBack("", "", partnum, "",
355 NULL, content_type, charset,
356 0, encoding, userdata);
359 /* Figure out where the boundaries are */
360 snprintf(startary, SIZ, "--%s", boundary);
361 snprintf(endary, SIZ, "--%s--", boundary);
362 startary_len = strlen(startary);
366 next_boundary = NULL;
367 for (srch=ptr; srch<content_end; ++srch) {
368 if (!memcmp(srch, startary, startary_len)) {
369 next_boundary = srch;
374 if ( (part_start != NULL) && (next_boundary != NULL) ) {
375 part_end = next_boundary;
376 --part_end; /* omit the trailing LF */
378 --part_end; /* omit the trailing CR */
381 if (!IsEmptyStr(partnum)) {
382 snprintf(nested_partnum,
383 sizeof nested_partnum,
388 snprintf(nested_partnum,
389 sizeof nested_partnum,
392 the_mime_parser(nested_partnum,
393 part_start, part_end,
395 PreMultiPartCallBack,
396 PostMultiPartCallBack,
401 if (next_boundary != NULL) {
402 /* If we pass out of scope, don't attempt to
403 * read past the end boundary. */
404 if (!strcmp(next_boundary, endary)) {
408 /* Set up for the next part. */
409 part_start = strstr(next_boundary, "\n");
411 /* Determine whether newlines are LF or CRLF */
412 evaluate_crlf_ptr = part_start;
414 if (!memcmp(evaluate_crlf_ptr, "\r\n", 2)) {
421 /* Advance past the LF ... now we're in the next part */
427 /* Invalid end of multipart. Bail out! */
430 } while ( (ptr < content_end) && (next_boundary != NULL) );
432 if (PostMultiPartCallBack != NULL) {
433 PostMultiPartCallBack("", "", partnum, "", NULL,
434 content_type, charset, 0, encoding, userdata);
439 /* If it's not a multipart message, then do something with it */
443 while (ptr < content_end) {
447 part_end = content_end;
450 * I thought there was an off-by-one error here, but there isn't.
451 * This probably means that there's an off-by-one error somewhere
452 * else ... or maybe only in certain messages?
457 /* Truncate if the header told us to */
458 if ( (content_length > 0) && (length > content_length) ) {
459 length = content_length;
462 /* Sometimes the "name" field is tacked on to Content-type,
463 * and sometimes it's tacked on to Content-disposition. Use
464 * whichever one we have.
466 if (strlen(content_disposition_name) > strlen(content_type_name)) {
467 name = content_disposition_name;
470 name = content_type_name;
473 /* lprintf(CTDL_DEBUG, "mime_decode part=%s, len=%d, type=%s, charset=%s, encoding=%s\n",
474 partnum, length, content_type, charset, encoding); */
476 /* Ok, we've got a non-multipart part here, so do something with it.
480 content_type, charset, encoding, disposition,
482 CallBack, NULL, NULL,
483 userdata, dont_decode
487 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
489 if (!strcasecmp(content_type, "message/rfc822")) {
491 if (PreMultiPartCallBack != NULL) {
492 PreMultiPartCallBack("", "", partnum, "",
493 NULL, content_type, charset,
494 0, encoding, userdata);
496 if (CallBack != NULL) {
497 if (strlen(partnum) > 0) {
498 snprintf(nested_partnum,
499 sizeof nested_partnum,
504 snprintf(nested_partnum,
505 sizeof nested_partnum,
508 the_mime_parser(nested_partnum,
509 part_start, part_end,
511 PreMultiPartCallBack,
512 PostMultiPartCallBack,
517 if (PostMultiPartCallBack != NULL) {
518 PostMultiPartCallBack("", "", partnum, "", NULL,
519 content_type, charset, 0, encoding, userdata);
527 end_parser: /* free the buffers! end the oppression!! */
535 free(content_type_name);
536 free(content_disposition_name);
544 * Entry point for the MIME parser.
545 * (This function expects to be fed HEADERS + CONTENT)
546 * Note: NULL can be supplied as content_end; in this case, the message is
547 * considered to have ended when the parser encounters a 0x00 byte.
549 void mime_parser(char *content_start,
564 void (*PreMultiPartCallBack)
576 void (*PostMultiPartCallBack)
593 the_mime_parser("", content_start, content_end,
595 PreMultiPartCallBack,
596 PostMultiPartCallBack,
597 userdata, dont_decode);