4 * This is the MIME parser for Citadel.
6 * Copyright (c) 1998-2007 by the citadel.org development team.
7 * This code is distributed under the GNU General Public License v3.
15 #include <sys/types.h>
21 #include "xdgmime/xdgmime.h"
22 #include "libcitadel.h"
24 void extract_key(char *target, char *source, char *key)
27 char looking_for[256];
28 int double_quotes = 0;
30 snprintf(looking_for, sizeof looking_for, "%s=", key);
32 ptr = bmstrcasestr(source, looking_for);
37 strcpy(target, (ptr + strlen(looking_for)));
39 for (ptr=target; (*ptr != 0); ++ptr) {
41 /* A semicolon means we've hit the end of the key, unless we're inside double quotes */
42 if ( (double_quotes != 1) && (*ptr == ';')) {
46 /* if we find double quotes, we've got a great set of string boundaries */
49 if (double_quotes == 1) {
61 * For non-multipart messages, we need to generate a quickie partnum of "1"
62 * to return to callback functions. Some callbacks demand it.
64 char *fixed_partnum(char *supplied_partnum) {
65 if (supplied_partnum == NULL) return "1";
66 if (strlen(supplied_partnum)==0) return "1";
67 return supplied_partnum;
73 * Convert "quoted-printable" to binary. Returns number of bytes decoded.
74 * according to RFC2045 section 6.7
76 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
78 int decoded_length = 0;
81 while (pos < sourcelen)
83 if (!strncmp(&encoded[pos], "=\r\n", 3))
87 else if (!strncmp(&encoded[pos], "=\n", 2))
91 else if (encoded[pos] == '=')
94 sscanf(&encoded[pos+1], "%02x", &ch);
96 decoded[decoded_length++] = ch;
100 decoded[decoded_length++] = encoded[pos];
104 decoded[decoded_length] = 0;
105 return(decoded_length);
110 * Given a message or message-part body and a length, handle any necessary
111 * decoding and pass the request up the stack.
113 void mime_decode(char *partnum,
114 char *part_start, size_t length,
115 char *content_type, char *charset, char *encoding,
117 char *name, char *filename,
129 void (*PreMultiPartCallBack)
140 void (*PostMultiPartCallBack)
157 size_t bytes_decoded = 0;
159 /* Some encodings aren't really encodings */
160 if (!strcasecmp(encoding, "7bit"))
161 strcpy(encoding, "");
162 if (!strcasecmp(encoding, "8bit"))
163 strcpy(encoding, "");
164 if (!strcasecmp(encoding, "binary"))
165 strcpy(encoding, "");
167 /* If this part is not encoded, send as-is */
168 if ( (strlen(encoding) == 0) || (dont_decode)) {
169 if (CallBack != NULL) {
170 CallBack(name, filename, fixed_partnum(partnum),
171 disposition, part_start,
172 content_type, charset, length, encoding, userdata);
177 /* Fail silently if we hit an unknown encoding. */
178 if ((strcasecmp(encoding, "base64"))
179 && (strcasecmp(encoding, "quoted-printable"))) {
184 * Allocate a buffer for the decoded data. The output buffer is slightly
185 * larger than the input buffer; this assumes that the decoded data
186 * will never be significantly larger than the encoded data. This is a
187 * safe assumption with base64, uuencode, and quoted-printable.
189 decoded = malloc(length + 32768);
190 if (decoded == NULL) {
194 if (!strcasecmp(encoding, "base64")) {
195 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
197 else if (!strcasecmp(encoding, "quoted-printable")) {
198 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
201 if (bytes_decoded > 0) if (CallBack != NULL) {
202 CallBack(name, filename, fixed_partnum(partnum),
203 disposition, decoded,
204 content_type, charset, bytes_decoded, "binary", userdata);
211 * Break out the components of a multipart message
212 * (This function expects to be fed HEADERS + CONTENT)
213 * Note: NULL can be supplied as content_end; in this case, the message is
214 * considered to have ended when the parser encounters a 0x00 byte.
216 void the_mime_parser(char *partnum,
217 char *content_start, char *content_end,
229 void (*PreMultiPartCallBack)
240 void (*PostMultiPartCallBack)
258 char *part_start, *part_end = NULL;
263 size_t startary_len = 0;
268 size_t content_length;
272 char *content_type_name;
273 char *content_disposition_name;
279 char nested_partnum[256];
281 char *evaluate_crlf_ptr = NULL;
288 boundary = malloc(SIZ);
289 memset(boundary, 0, SIZ);
291 startary = malloc(SIZ);
292 memset(startary, 0, SIZ);
294 endary = malloc(SIZ);
295 memset(endary, 0, SIZ);
297 header = malloc(SIZ);
298 memset(header, 0, SIZ);
300 content_type = malloc(SIZ);
301 memset(content_type, 0, SIZ);
303 charset = malloc(SIZ);
304 memset(charset, 0, SIZ);
306 encoding = malloc(SIZ);
307 memset(encoding, 0, SIZ);
309 content_type_name = malloc(SIZ);
310 memset(content_type_name, 0, SIZ);
312 content_disposition_name = malloc(SIZ);
313 memset(content_disposition_name, 0, SIZ);
315 filename = malloc(SIZ);
316 memset(filename, 0, SIZ);
318 disposition = malloc(SIZ);
319 memset(disposition, 0, SIZ);
321 /* If the caller didn't supply an endpointer, generate one by measure */
322 if (content_end == NULL) {
323 content_end = &content_start[strlen(content_start)];
326 /* Learn interesting things from the headers */
330 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
331 if (ptr >= content_end) {
335 for (i = 0; i < buflen; ++i) {
336 if (isspace(buf[i])) {
341 if (!isspace(buf[0])) {
342 if (!strncasecmp(header, "Content-type:", 13)) {
343 strcpy(content_type, &header[13]);
344 striplt(content_type);
345 extract_key(content_type_name, content_type, "name");
346 extract_key(charset, content_type, "charset");
347 extract_key(boundary, header, "boundary");
348 /* Deal with weird headers */
349 if (strchr(content_type, ' '))
350 *(strchr(content_type, ' ')) = '\0';
351 if (strchr(content_type, ';'))
352 *(strchr(content_type, ';')) = '\0';
354 if (!strncasecmp(header, "Content-Disposition:", 20)) {
355 strcpy(disposition, &header[20]);
356 striplt(disposition);
357 extract_key(content_disposition_name, disposition, "name");
358 extract_key(filename, disposition, "filename");
360 if (!strncasecmp(header, "Content-length: ", 15)) {
362 safestrncpy(clbuf, &header[15], sizeof clbuf);
364 content_length = (size_t) atol(clbuf);
366 if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
367 strcpy(encoding, &header[26]);
373 if ((headerlen + buflen + 2) < SIZ) {
374 memcpy(&header[headerlen], buf, buflen);
376 header[headerlen] = '\0';
378 } while ((!IsEmptyStr(buf)) && (*ptr != 0));
380 if (strchr(disposition, ';'))
381 *(strchr(disposition, ';')) = '\0';
382 striplt(disposition);
383 if (strchr(content_type, ';'))
384 *(strchr(content_type, ';')) = '\0';
385 striplt(content_type);
387 if (!IsEmptyStr(boundary)) {
393 /* If this is a multipart message, then recursively process it */
397 /* Tell the client about this message's multipartedness */
398 if (PreMultiPartCallBack != NULL) {
399 PreMultiPartCallBack("", "", partnum, "",
400 NULL, content_type, charset,
401 0, encoding, userdata);
404 /* Figure out where the boundaries are */
405 snprintf(startary, SIZ, "--%s", boundary);
406 snprintf(endary, SIZ, "--%s--", boundary);
407 startary_len = strlen(startary);
411 next_boundary = NULL;
412 for (srch=ptr; srch<content_end; ++srch) {
413 if (!memcmp(srch, startary, startary_len)) {
414 next_boundary = srch;
419 if ( (part_start != NULL) && (next_boundary != NULL) ) {
420 part_end = next_boundary;
421 --part_end; /* omit the trailing LF */
423 --part_end; /* omit the trailing CR */
426 if (!IsEmptyStr(partnum)) {
427 snprintf(nested_partnum,
428 sizeof nested_partnum,
433 snprintf(nested_partnum,
434 sizeof nested_partnum,
437 the_mime_parser(nested_partnum,
438 part_start, part_end,
440 PreMultiPartCallBack,
441 PostMultiPartCallBack,
446 if (next_boundary != NULL) {
447 /* If we pass out of scope, don't attempt to
448 * read past the end boundary. */
449 if (!strcmp(next_boundary, endary)) {
453 /* Set up for the next part. */
454 part_start = strstr(next_boundary, "\n");
456 /* Determine whether newlines are LF or CRLF */
457 evaluate_crlf_ptr = part_start;
459 if (!memcmp(evaluate_crlf_ptr, "\r\n", 2)) {
466 /* Advance past the LF ... now we're in the next part */
472 /* Invalid end of multipart. Bail out! */
475 } while ( (ptr < content_end) && (next_boundary != NULL) );
477 if (PostMultiPartCallBack != NULL) {
478 PostMultiPartCallBack("", "", partnum, "", NULL,
479 content_type, charset, 0, encoding, userdata);
484 /* If it's not a multipart message, then do something with it */
488 while (ptr < content_end) {
492 part_end = content_end;
495 * I thought there was an off-by-one error here, but there isn't.
496 * This probably means that there's an off-by-one error somewhere
497 * else ... or maybe only in certain messages?
502 /* Truncate if the header told us to */
503 if ( (content_length > 0) && (length > content_length) ) {
504 length = content_length;
507 /* Sometimes the "name" field is tacked on to Content-type,
508 * and sometimes it's tacked on to Content-disposition. Use
509 * whichever one we have.
511 if (strlen(content_disposition_name) > strlen(content_type_name)) {
512 name = content_disposition_name;
515 name = content_type_name;
518 /* Ok, we've got a non-multipart part here, so do something with it.
522 content_type, charset, encoding, disposition,
524 CallBack, NULL, NULL,
525 userdata, dont_decode
529 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
531 if (!strcasecmp(content_type, "message/rfc822")) {
533 if (PreMultiPartCallBack != NULL) {
534 PreMultiPartCallBack("", "", partnum, "",
535 NULL, content_type, charset,
536 0, encoding, userdata);
538 if (CallBack != NULL) {
539 if (strlen(partnum) > 0) {
540 snprintf(nested_partnum,
541 sizeof nested_partnum,
546 snprintf(nested_partnum,
547 sizeof nested_partnum,
550 the_mime_parser(nested_partnum,
551 part_start, part_end,
553 PreMultiPartCallBack,
554 PostMultiPartCallBack,
559 if (PostMultiPartCallBack != NULL) {
560 PostMultiPartCallBack("", "", partnum, "", NULL,
561 content_type, charset, 0, encoding, userdata);
569 end_parser: /* free the buffers! end the oppression!! */
577 free(content_type_name);
578 free(content_disposition_name);
586 * Entry point for the MIME parser.
587 * (This function expects to be fed HEADERS + CONTENT)
588 * Note: NULL can be supplied as content_end; in this case, the message is
589 * considered to have ended when the parser encounters a 0x00 byte.
591 void mime_parser(char *content_start,
606 void (*PreMultiPartCallBack)
618 void (*PostMultiPartCallBack)
635 the_mime_parser("", content_start, content_end,
637 PreMultiPartCallBack,
638 PostMultiPartCallBack,
639 userdata, dont_decode);
647 typedef struct _MimeGuess {
651 const char *MimeString;
654 MimeGuess MyMimes [] = {
682 const char *GuessMimeType(char *data, size_t dlen)
686 while (MyMimes[MimeIndex].PatternLen != 0)
688 if ((MyMimes[MimeIndex].PatternLen +
689 MyMimes[MimeIndex].PatternOffset < dlen) &&
690 strncmp(MyMimes[MimeIndex].Pattern,
691 &data[MyMimes[MimeIndex].PatternOffset],
692 MyMimes[MimeIndex].PatternLen) == 0)
694 return MyMimes[MimeIndex].MimeString;
699 * ok, our simple minded algorythm didn't find anything,
700 * let the big chegger try it, he wil default to application/octet-stream
702 return (xdg_mime_get_mime_type_for_data(data, dlen));
706 const char* GuessMimeByFilename(const char *what, size_t len)
708 /* we know some hardcoded on our own, try them... */
709 if (!strncasecmp(&what[len - 4], ".gif", 4))
711 else if (!strncasecmp(&what[len - 3], ".js", 3))
712 return "text/javascript";
713 else if (!strncasecmp(&what[len - 4], ".txt", 4))
715 else if (!strncasecmp(&what[len - 4], ".css", 4))
717 else if (!strncasecmp(&what[len - 4], ".jpg", 4))
719 else if (!strncasecmp(&what[len - 4], ".png", 4))
721 else if (!strncasecmp(&what[len - 4], ".ico", 4))
722 return "image/x-icon";
723 else if (!strncasecmp(&what[len - 5], ".html", 5))
725 else if (!strncasecmp(&what[len - 4], ".htm", 4))
727 else if (!strncasecmp(&what[len - 4], ".wml", 4))
728 return "text/vnd.wap.wml";
729 else if (!strncasecmp(&what[len - 5], ".wmls", 5))
730 return "text/vnd.wap.wmlscript";
731 else if (!strncasecmp(&what[len - 5], ".wmlc", 5))
732 return "application/vnd.wap.wmlc";
733 else if (!strncasecmp(&what[len - 6], ".wmlsc", 6))
734 return "application/vnd.wap.wmlscriptc";
735 else if (!strncasecmp(&what[len - 5], ".wbmp", 5))
736 return "image/vnd.wap.wbmp";
738 /* and let xdgmime do the fallback. */
739 return xdg_mime_get_mime_type_from_file_name(what);