4 * This is the MIME parser for Citadel.
6 * Copyright (c) 1998-2007 by the citadel.org development team.
7 * This code is distributed under the GNU General Public License v3.
15 #include <sys/types.h>
19 #include <sys/types.h>
23 #include "xdgmime/xdgmime.h"
24 #include "libcitadel.h"
25 #include "libcitadellocal.h"
27 void extract_key(char *target, char *source, char *key)
30 char looking_for[256];
31 int double_quotes = 0;
33 snprintf(looking_for, sizeof looking_for, "%s=", key);
35 ptr = bmstrcasestr(source, looking_for);
40 strcpy(target, (ptr + strlen(looking_for)));
42 for (ptr=target; (*ptr != 0); ++ptr) {
44 /* A semicolon means we've hit the end of the key, unless we're inside double quotes */
45 if ( (double_quotes != 1) && (*ptr == ';')) {
49 /* if we find double quotes, we've got a great set of string boundaries */
52 if (double_quotes == 1) {
64 * For non-multipart messages, we need to generate a quickie partnum of "1"
65 * to return to callback functions. Some callbacks demand it.
67 char *fixed_partnum(char *supplied_partnum) {
68 if (supplied_partnum == NULL) return "1";
69 if (strlen(supplied_partnum)==0) return "1";
70 return supplied_partnum;
76 * Convert "quoted-printable" to binary. Returns number of bytes decoded.
77 * according to RFC2045 section 6.7
79 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
81 int decoded_length = 0;
84 while (pos < sourcelen)
86 if (!strncmp(&encoded[pos], "=\r\n", 3))
90 else if (!strncmp(&encoded[pos], "=\n", 2))
94 else if (encoded[pos] == '=')
97 sscanf(&encoded[pos+1], "%02x", &ch);
99 decoded[decoded_length++] = ch;
103 decoded[decoded_length++] = encoded[pos];
107 decoded[decoded_length] = 0;
108 return(decoded_length);
113 * Given a message or message-part body and a length, handle any necessary
114 * decoding and pass the request up the stack.
116 void mime_decode(char *partnum,
117 char *part_start, size_t length,
118 char *content_type, char *charset, char *encoding,
121 char *name, char *filename,
134 void (*PreMultiPartCallBack)
146 void (*PostMultiPartCallBack)
164 size_t bytes_decoded = 0;
166 /* Some encodings aren't really encodings */
167 if (!strcasecmp(encoding, "7bit"))
168 strcpy(encoding, "");
169 if (!strcasecmp(encoding, "8bit"))
170 strcpy(encoding, "");
171 if (!strcasecmp(encoding, "binary"))
172 strcpy(encoding, "");
174 /* If this part is not encoded, send as-is */
175 if ( (strlen(encoding) == 0) || (dont_decode)) {
176 if (CallBack != NULL) {
177 CallBack(name, filename, fixed_partnum(partnum),
178 disposition, part_start,
179 content_type, charset, length, encoding, id, userdata);
184 /* Fail silently if we hit an unknown encoding. */
185 if ((strcasecmp(encoding, "base64"))
186 && (strcasecmp(encoding, "quoted-printable"))) {
191 * Allocate a buffer for the decoded data. The output buffer is slightly
192 * larger than the input buffer; this assumes that the decoded data
193 * will never be significantly larger than the encoded data. This is a
194 * safe assumption with base64, uuencode, and quoted-printable.
196 decoded = malloc(length + 32768);
197 if (decoded == NULL) {
201 if (!strcasecmp(encoding, "base64")) {
202 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
204 else if (!strcasecmp(encoding, "quoted-printable")) {
205 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
208 if (bytes_decoded > 0) if (CallBack != NULL) {
209 CallBack(name, filename, fixed_partnum(partnum),
210 disposition, decoded,
211 content_type, charset, bytes_decoded, "binary", id, userdata);
218 * Break out the components of a multipart message
219 * (This function expects to be fed HEADERS + CONTENT)
220 * Note: NULL can be supplied as content_end; in this case, the message is
221 * considered to have ended when the parser encounters a 0x00 byte.
223 void the_mime_parser(char *partnum,
224 char *content_start, char *content_end,
237 void (*PreMultiPartCallBack)
249 void (*PostMultiPartCallBack)
268 char *part_start, *part_end = NULL;
273 size_t startary_len = 0;
278 size_t content_length;
283 char *content_type_name;
284 char *content_disposition_name;
290 char nested_partnum[256];
292 char *evaluate_crlf_ptr = NULL;
299 boundary = malloc(SIZ);
300 memset(boundary, 0, SIZ);
302 startary = malloc(SIZ);
303 memset(startary, 0, SIZ);
305 endary = malloc(SIZ);
306 memset(endary, 0, SIZ);
308 header = malloc(SIZ);
309 memset(header, 0, SIZ);
311 content_type = malloc(SIZ);
312 memset(content_type, 0, SIZ);
314 charset = malloc(SIZ);
315 memset(charset, 0, SIZ);
317 encoding = malloc(SIZ);
318 memset(encoding, 0, SIZ);
320 content_type_name = malloc(SIZ);
321 memset(content_type_name, 0, SIZ);
323 content_disposition_name = malloc(SIZ);
324 memset(content_disposition_name, 0, SIZ);
326 filename = malloc(SIZ);
327 memset(filename, 0, SIZ);
329 disposition = malloc(SIZ);
330 memset(disposition, 0, SIZ);
335 /* If the caller didn't supply an endpointer, generate one by measure */
336 if (content_end == NULL) {
337 content_end = &content_start[strlen(content_start)];
340 /* Learn interesting things from the headers */
344 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
345 if (ptr >= content_end) {
349 for (i = 0; i < buflen; ++i) {
350 if (isspace(buf[i])) {
355 if (!isspace(buf[0])) {
356 if (!strncasecmp(header, "Content-type:", 13)) {
357 strcpy(content_type, &header[13]);
358 striplt(content_type);
359 extract_key(content_type_name, content_type, "name");
360 extract_key(charset, content_type, "charset");
361 extract_key(boundary, header, "boundary");
362 /* Deal with weird headers */
363 if (strchr(content_type, ' '))
364 *(strchr(content_type, ' ')) = '\0';
365 if (strchr(content_type, ';'))
366 *(strchr(content_type, ';')) = '\0';
368 if (!strncasecmp(header, "Content-Disposition:", 20)) {
369 strcpy(disposition, &header[20]);
370 striplt(disposition);
371 extract_key(content_disposition_name, disposition, "name");
372 extract_key(filename, disposition, "filename");
374 if (!strncasecmp(header, "Content-ID:", 11)) {
375 strcpy(id, &header[11]);
377 stripallbut(id, '<', '>');
379 if (!strncasecmp(header, "Content-length: ", 15)) {
381 safestrncpy(clbuf, &header[15], sizeof clbuf);
383 content_length = (size_t) atol(clbuf);
385 if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
386 strcpy(encoding, &header[26]);
392 if ((headerlen + buflen + 2) < SIZ) {
393 memcpy(&header[headerlen], buf, buflen);
395 header[headerlen] = '\0';
397 } while ((!IsEmptyStr(buf)) && (*ptr != 0));
399 if (strchr(disposition, ';'))
400 *(strchr(disposition, ';')) = '\0';
401 striplt(disposition);
402 if (strchr(content_type, ';'))
403 *(strchr(content_type, ';')) = '\0';
404 striplt(content_type);
406 if (!IsEmptyStr(boundary)) {
412 /* If this is a multipart message, then recursively process it */
416 /* Tell the client about this message's multipartedness */
417 if (PreMultiPartCallBack != NULL) {
418 PreMultiPartCallBack("", "", partnum, "",
419 NULL, content_type, charset,
420 0, encoding, id, userdata);
423 /* Figure out where the boundaries are */
424 snprintf(startary, SIZ, "--%s", boundary);
425 snprintf(endary, SIZ, "--%s--", boundary);
426 startary_len = strlen(startary);
430 next_boundary = NULL;
431 for (srch=ptr; srch<content_end; ++srch) {
432 if (!memcmp(srch, startary, startary_len)) {
433 next_boundary = srch;
438 if ( (part_start != NULL) && (next_boundary != NULL) ) {
439 part_end = next_boundary;
440 --part_end; /* omit the trailing LF */
442 --part_end; /* omit the trailing CR */
445 if (!IsEmptyStr(partnum)) {
446 snprintf(nested_partnum,
447 sizeof nested_partnum,
452 snprintf(nested_partnum,
453 sizeof nested_partnum,
456 the_mime_parser(nested_partnum,
457 part_start, part_end,
459 PreMultiPartCallBack,
460 PostMultiPartCallBack,
465 if (next_boundary != NULL) {
466 /* If we pass out of scope, don't attempt to
467 * read past the end boundary. */
468 if (!strcmp(next_boundary, endary)) {
472 /* Set up for the next part. */
473 part_start = strstr(next_boundary, "\n");
475 /* Determine whether newlines are LF or CRLF */
476 evaluate_crlf_ptr = part_start;
478 if (!memcmp(evaluate_crlf_ptr, "\r\n", 2)) {
485 /* Advance past the LF ... now we're in the next part */
491 /* Invalid end of multipart. Bail out! */
494 } while ( (ptr < content_end) && (next_boundary != NULL) );
496 if (PostMultiPartCallBack != NULL) {
497 PostMultiPartCallBack("", "", partnum, "", NULL,
498 content_type, charset, 0, encoding, id, userdata);
503 /* If it's not a multipart message, then do something with it */
507 while (ptr < content_end) {
511 part_end = content_end;
514 /* The following code will truncate the MIME part to the size
515 * specified by the Content-length: header. We have commented it
516 * out because these headers have a tendency to be wrong.
518 * if ( (content_length > 0) && (length > content_length) ) {
519 * length = content_length;
523 /* Sometimes the "name" field is tacked on to Content-type,
524 * and sometimes it's tacked on to Content-disposition. Use
525 * whichever one we have.
527 if (strlen(content_disposition_name) > strlen(content_type_name)) {
528 name = content_disposition_name;
531 name = content_type_name;
534 /* Ok, we've got a non-multipart part here, so do something with it.
538 content_type, charset, encoding, disposition, id,
540 CallBack, NULL, NULL,
541 userdata, dont_decode
545 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
547 if (!strcasecmp(content_type, "message/rfc822")) {
549 if (PreMultiPartCallBack != NULL) {
550 PreMultiPartCallBack("", "", partnum, "",
551 NULL, content_type, charset,
552 0, encoding, id, userdata);
554 if (CallBack != NULL) {
555 if (strlen(partnum) > 0) {
556 snprintf(nested_partnum,
557 sizeof nested_partnum,
562 snprintf(nested_partnum,
563 sizeof nested_partnum,
566 the_mime_parser(nested_partnum,
567 part_start, part_end,
569 PreMultiPartCallBack,
570 PostMultiPartCallBack,
575 if (PostMultiPartCallBack != NULL) {
576 PostMultiPartCallBack("", "", partnum, "", NULL,
577 content_type, charset, 0, encoding, id, userdata);
585 end_parser: /* free the buffers! end the oppression!! */
593 free(content_type_name);
594 free(content_disposition_name);
603 * Entry point for the MIME parser.
604 * (This function expects to be fed HEADERS + CONTENT)
605 * Note: NULL can be supplied as content_end; in this case, the message is
606 * considered to have ended when the parser encounters a 0x00 byte.
608 void mime_parser(char *content_start,
624 void (*PreMultiPartCallBack)
637 void (*PostMultiPartCallBack)
655 the_mime_parser("", content_start, content_end,
657 PreMultiPartCallBack,
658 PostMultiPartCallBack,
659 userdata, dont_decode);
667 typedef struct _MimeGuess {
671 const char *MimeString;
674 MimeGuess MyMimes [] = {
702 const char *GuessMimeType(const char *data, size_t dlen)
706 while (MyMimes[MimeIndex].PatternLen != 0)
708 if ((MyMimes[MimeIndex].PatternLen +
709 MyMimes[MimeIndex].PatternOffset < dlen) &&
710 strncmp(MyMimes[MimeIndex].Pattern,
711 &data[MyMimes[MimeIndex].PatternOffset],
712 MyMimes[MimeIndex].PatternLen) == 0)
714 return MyMimes[MimeIndex].MimeString;
719 * ok, our simple minded algorythm didn't find anything,
720 * let the big chegger try it, he wil default to application/octet-stream
722 return (xdg_mime_get_mime_type_for_data(data, dlen));
726 const char* GuessMimeByFilename(const char *what, size_t len)
728 /* we know some hardcoded on our own, try them... */
729 if ((len > 3) && !strncasecmp(&what[len - 4], ".gif", 4))
731 else if ((len > 2) && !strncasecmp(&what[len - 3], ".js", 3))
732 return "text/javascript";
733 else if ((len > 3) && !strncasecmp(&what[len - 4], ".txt", 4))
735 else if ((len > 3) && !strncasecmp(&what[len - 4], ".css", 4))
737 else if ((len > 3) && !strncasecmp(&what[len - 4], ".jpg", 4))
739 else if ((len > 3) && !strncasecmp(&what[len - 4], ".png", 4))
741 else if ((len > 3) && !strncasecmp(&what[len - 4], ".ico", 4))
742 return "image/x-icon";
743 else if ((len > 3) && !strncasecmp(&what[len - 4], ".vcf", 4))
744 return "text/x-vcard";
745 else if ((len > 4) && !strncasecmp(&what[len - 5], ".html", 5))
747 else if ((len > 3) && !strncasecmp(&what[len - 4], ".htm", 4))
749 else if ((len > 3) && !strncasecmp(&what[len - 4], ".wml", 4))
750 return "text/vnd.wap.wml";
751 else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmls", 5))
752 return "text/vnd.wap.wmlscript";
753 else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmlc", 5))
754 return "application/vnd.wap.wmlc";
755 else if ((len > 5) && !strncasecmp(&what[len - 6], ".wmlsc", 6))
756 return "application/vnd.wap.wmlscriptc";
757 else if ((len > 4) && !strncasecmp(&what[len - 5], ".wbmp", 5))
758 return "image/vnd.wap.wbmp";
760 /* and let xdgmime do the fallback. */
761 return xdg_mime_get_mime_type_from_file_name(what);
764 static HashList *IconHash = NULL;
766 typedef struct IconName IconName;
773 static void DeleteIcon(void *IconNamePtr)
775 IconName *Icon = (IconName*) IconNamePtr;
776 free(Icon->FlatName);
777 free(Icon->FileName);
782 static const char *PrintFlat(void *IconNamePtr)
784 IconName *Icon = (IconName*) IconNamePtr;
785 return Icon->FlatName;
787 static const char *PrintFile(void *IconNamePtr)
789 IconName *Icon = (IconName*) IconNamePtr;
790 return Icon->FileName;
794 #define GENSTR "x-generic"
795 #define IGNORE_PREFIX_1 "gnome-mime"
796 int LoadIconDir(const char *DirName)
799 struct dirent *filedir_entry;
804 filedir = opendir (DirName);
805 IconHash = NewHash(1, NULL);
806 if (filedir == NULL) {
810 while ((filedir_entry = readdir(filedir)))
814 #ifdef _DIRENT_HAVE_D_NAMELEN
815 d_namelen = filedir_entry->d_namelen;
817 d_namelen = strlen(filedir_entry->d_name);
819 d_without_ext = d_namelen;
820 while ((d_without_ext > 0) && (filedir_entry->d_name[d_without_ext] != '.'))
822 if ((d_without_ext == 0) || (d_namelen < 3))
825 if ((sizeof(IGNORE_PREFIX_1) < d_namelen) &&
826 (strncmp(IGNORE_PREFIX_1,
827 filedir_entry->d_name,
828 sizeof(IGNORE_PREFIX_1) - 1) == 0)) {
829 PStart = filedir_entry->d_name + sizeof(IGNORE_PREFIX_1);
830 d_without_ext -= sizeof(IGNORE_PREFIX_1);
833 PStart = filedir_entry->d_name;
835 Icon = malloc(sizeof(IconName));
837 Icon->FileName = malloc(d_namelen + 1);
838 memcpy(Icon->FileName, filedir_entry->d_name, d_namelen + 1);
840 Icon->FlatName = malloc(d_without_ext + 1);
841 memcpy(Icon->FlatName, PStart, d_without_ext);
842 Icon->FlatName[d_without_ext] = '\0';
843 /* Try to find Minor type in image-jpeg */
844 MinorPtr = strchr(Icon->FlatName, '-');
845 if (MinorPtr != NULL) {
847 MinorLen = 1 + d_without_ext - (MinorPtr - Icon->FlatName + 1);
848 if ((MinorLen == sizeof(GENSTR)) &&
849 (strncmp(MinorPtr + 1, GENSTR, sizeof(GENSTR)) == 0)) {
850 /* ok, we found a generic filename. cut the generic. */
852 d_without_ext = d_without_ext - (MinorPtr - Icon->FlatName);
854 else { /* Map the major / minor separator to / */
859 // PrintHash(IconHash, PrintFlat, PrintFile);
860 // printf("%s - %s\n", Icon->FlatName, Icon->FileName);
861 Put(IconHash, Icon->FlatName, d_without_ext, Icon, DeleteIcon);
862 // PrintHash(IconHash, PrintFlat, PrintFile);
868 const char *GetIconFilename(char *MimeType, size_t len)
876 GetHash(IconHash, MimeType, len, &vIcon), Icon = (IconName*) vIcon;
877 /* didn't find the exact mimetype? try major only. */
880 pMinor = strchr(MimeType, '/');
881 if (pMinor != NULL) {
883 GetHash(IconHash, MimeType, pMinor - MimeType, &vIcon),
884 Icon = (IconName*) vIcon;
891 /*printf("Getting: [%s] == [%s] -> [%s]\n", MimeType, Icon->FlatName, Icon->FileName);*/
892 return Icon->FileName;
895 void ShutDownLibCitadelMime(void)
897 DeleteHash(&IconHash);