Looks like I need to remove some optimization cancer from mime_parser.c, but first...
[citadel.git] / libcitadel / lib / mime_parser.c
1 // This is the MIME parser for Citadel.
2 //
3 // Copyright (c) 1998-2023 by the citadel.org development team.
4 //
5 // This program is open source software.  Use, duplication, or disclosure
6 // is subject to the terms of the GNU General Public License, version 3.
7
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <signal.h>
12 #include <sys/types.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <sys/stat.h>
16 #include <sys/types.h>
17 #include <dirent.h>
18 #include <errno.h>
19
20 #include "xdgmime/xdgmime.h"
21 #include "libcitadel.h"
22 #include "libcitadellocal.h"
23
24 const unsigned char FromHexTable[256] = {
25         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //  0
26         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 10
27         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 20
28         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 30
29         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, // 40
30         0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, // 50
31         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, // 60
32         0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 70
33         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 80
34         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, // 90
35         0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //100
36         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //110
37         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //120
38         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //130
39         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //140
40         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //150
41         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //160
42         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //170
43         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //180
44         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //190
45         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //200
46         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //210
47         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //220
48         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //230
49         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //240
50         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF                          //250
51 };
52
53
54 long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd) {
55         char *sptr, *ptr = NULL;
56         int double_quotes = 0;
57         long RealKeyLen = keylen;
58
59         sptr = source;
60
61         while (sptr != NULL) {
62                 ptr = bmstrcasestr_len(sptr, sourcelen - (sptr - source), key, keylen);
63                 if (ptr != NULL) {
64                         while (isspace(*(ptr + RealKeyLen)))
65                                 RealKeyLen ++;
66                         if (*(ptr + RealKeyLen) == KeyEnd) {
67                                 sptr = NULL;
68                                 RealKeyLen ++;                          
69                         }
70                         else {
71                                 sptr = ptr + RealKeyLen + 1;
72                         }
73                 }
74                 else 
75                         sptr = ptr;
76         }
77         if (ptr == NULL) {
78                 *target = '\0';
79                 return 0;
80         }
81         strcpy(target, (ptr + RealKeyLen));
82
83         for (ptr=target; (*ptr != 0); ptr++) {
84
85                 // A semicolon means we've hit the end of the key, unless we're inside double quotes
86                 if ( (double_quotes != 1) && (*ptr == ';')) {
87                         *ptr = 0;
88                 }
89
90                 // if we find double quotes, we've got a great set of string boundaries
91                 if (*ptr == '\"') {
92                         ++double_quotes;
93                         if (double_quotes == 1) {
94                                 strcpy(ptr, ptr+1);
95                         }
96                         else {
97                                 *ptr = 0;
98                         }
99                 }
100         }
101         *ptr = '\0';
102         return ptr - target;
103 }
104
105
106 // For non-multipart messages, we need to generate a quickie partnum of "1"
107 // to return to callback functions.  Some callbacks demand it.
108 char *fixed_partnum(char *supplied_partnum) {
109         if (supplied_partnum == NULL) return "1";
110         if (strlen(supplied_partnum)==0) return "1";
111         return supplied_partnum;
112 }
113
114
115 static inline unsigned int _decode_hex(const char *Source) {
116         unsigned int ret = '?';
117         unsigned char LO_NIBBLE;
118         unsigned char HI_NIBBLE;
119
120         HI_NIBBLE = FromHexTable[(unsigned char) *Source];
121         LO_NIBBLE = FromHexTable[(unsigned char) *(Source+1)];
122         
123         if ((LO_NIBBLE == 0xFF) || (LO_NIBBLE == 0xFF))
124                 return ret;
125         ret = HI_NIBBLE;
126         ret = ret << 4;
127         ret = ret | LO_NIBBLE;
128         return ret;
129 }
130
131 unsigned int decode_hex(char *Source) {return _decode_hex(Source);}
132
133
134 // Convert "quoted-printable" to binary.  Returns number of bytes decoded.
135 // according to RFC2045 section 6.7
136 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
137         unsigned int ch;
138         int decoded_length = 0;
139         int pos = 0;
140
141         while (pos < sourcelen) {
142                 if (*(encoded + pos) == '=') {
143                         pos ++;
144                         if (*(encoded + pos) == '\n') {
145                                 pos ++;
146                         }
147                         else if (*(encoded + pos) == '\r') {
148                                 pos ++;
149                                 if (*(encoded + pos) == '\n')
150                                         pos++;
151                         }
152                         else {
153                                 ch = _decode_hex(&encoded[pos]);
154                                 pos += 2;
155                                 decoded[decoded_length++] = ch;
156                         }
157                 }
158                 else {
159                         decoded[decoded_length++] = encoded[pos];
160                         pos += 1;
161                 }
162         }
163         decoded[decoded_length] = 0;
164         return(decoded_length);
165 }
166
167
168 // Given a message or message-part body and a length, handle any necessary
169 // decoding and pass the request up the stack.
170 void mime_decode(char *partnum,
171                  char *part_start, size_t length,
172                  char *content_type, char *charset, char *encoding,
173                  char *disposition,
174                  char *id,
175                  char *name, char *filename,
176                  MimeParserCallBackType CallBack,
177                  MimeParserCallBackType PreMultiPartCallBack,
178                  MimeParserCallBackType PostMultiPartCallBack,
179                  void *userdata,
180                  int dont_decode
181 ) {
182         char *decoded;
183         size_t bytes_decoded = 0;
184
185         // Some encodings aren't really encodings
186         if (!strcasecmp(encoding, "7bit"))
187                 *encoding = '\0';
188         if (!strcasecmp(encoding, "8bit"))
189                 *encoding = '\0';
190         if (!strcasecmp(encoding, "binary"))
191                 *encoding = '\0';
192         if (!strcasecmp(encoding, "ISO-8859-1"))
193                 *encoding = '\0';
194
195         // If this part is not encoded, send as-is
196         if ( (strlen(encoding) == 0) || (dont_decode)) {
197                 if (CallBack != NULL) {
198                         CallBack(name, 
199                                  filename, 
200                                  fixed_partnum(partnum),
201                                  disposition, 
202                                  part_start,
203                                  content_type, 
204                                  charset, 
205                                  length, 
206                                  encoding, 
207                                  id,
208                                  userdata);
209                         }
210                 return;
211         }
212         
213         // Fail silently if we hit an unknown encoding.
214         if ((strcasecmp(encoding, "base64")) && (strcasecmp(encoding, "quoted-printable"))) {
215                 return;
216         }
217
218         // Allocate a buffer for the decoded data.  The output buffer is slightly
219         // larger than the input buffer; this assumes that the decoded data
220         // will never be significantly larger than the encoded data.  This is a
221         // safe assumption with base64, uuencode, and quoted-printable.
222         decoded = malloc(length + 32768);
223         if (decoded == NULL) {
224                 return;
225         }
226
227         if (!strcasecmp(encoding, "base64")) {
228                 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
229         }
230         else if (!strcasecmp(encoding, "quoted-printable")) {
231                 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
232         }
233
234         if (bytes_decoded > 0) if (CallBack != NULL) {
235                         char encoding_buf[SIZ];
236
237                         strcpy(encoding_buf, "binary");
238                         CallBack(name, 
239                                  filename, 
240                                  fixed_partnum(partnum),
241                                  disposition, 
242                                  decoded,
243                                  content_type, 
244                                  charset, 
245                                  bytes_decoded, 
246                                  encoding_buf, 
247                                  id, 
248                                  userdata);
249         }
250
251         free(decoded);
252 }
253
254 // this is the extract of mime_decode which can be called if 'dont_decode' was set; 
255 // to save the cpu intense process of decoding to the time when it realy wants the content. 
256 // returns: 
257 //   - > 0 we decoded something, its on *decoded, you need to free it.
258 //   - = 0 no need to decode stuff. *decoded will be NULL.
259 //   - < 0 an error occured, either an unknown encoding, or alloc failed. no need to free.
260 int mime_decode_now (char *part_start, 
261                      size_t length,
262                      char *encoding,
263                      char **decoded,
264                      size_t *bytes_decoded)
265 {
266         *bytes_decoded = 0;
267         *decoded = NULL;
268         // Some encodings aren't really encodings
269         if (!strcasecmp(encoding, "7bit"))
270                 *encoding = '\0';
271         if (!strcasecmp(encoding, "8bit"))
272                 *encoding = '\0';
273         if (!strcasecmp(encoding, "binary"))
274                 *encoding = '\0';
275
276         // If this part is not encoded, send as-is
277         if (strlen(encoding) == 0) {
278                 return 0;
279         }
280         
281
282         // Fail if we hit an unknown encoding.
283         if ((strcasecmp(encoding, "base64"))
284             && (strcasecmp(encoding, "quoted-printable"))) {
285                 return -1;
286         }
287
288         // Allocate a buffer for the decoded data.  The output buffer is slightly
289         // larger than the input buffer; this assumes that the decoded data
290         // will never be significantly larger than the encoded data.  This is a
291         // safe assumption with base64, uuencode, and quoted-printable.
292         *decoded = malloc(length + 32768);
293         if (decoded == NULL) {
294                 return -1;
295         }
296
297         if (!strcasecmp(encoding, "base64")) {
298                 *bytes_decoded = CtdlDecodeBase64(*decoded, part_start, length);
299                 return 1;
300         }
301         else if (!strcasecmp(encoding, "quoted-printable")) {
302                 *bytes_decoded = CtdlDecodeQuotedPrintable(*decoded, part_start, length);
303                 return 1;
304         }
305         return -1;
306 }
307
308 typedef enum _eIntMimeHdrs {
309         boundary,
310         startary,
311         endary,
312         content_type,
313         charset,
314         encoding,
315         content_type_name,
316         content_disposition_name,
317         filename,
318         disposition,
319         id,
320         eMax /* don't move ! */
321 } eIntMimeHdrs;
322
323 typedef struct _CBufStr {
324         char Key[SIZ];
325         long len;
326 }CBufStr;
327
328 typedef struct _interesting_mime_headers {
329         CBufStr b[eMax];
330         long content_length;
331         long is_multipart;
332 } interesting_mime_headers;
333
334
335 static void FlushInterestingMimes(interesting_mime_headers *m)
336 {
337         int i;
338         
339         for (i = 0; i < eMax; i++) {
340              m->b[i].Key[0] = '\0';
341              m->b[i].len = 0;
342         }
343         m->content_length = -1;
344 }
345 static interesting_mime_headers *InitInterestingMimes(void)
346 {
347         interesting_mime_headers *m;
348         m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
349
350         FlushInterestingMimes(m);
351
352         return m;
353 }
354
355
356 static long parse_MimeHeaders(interesting_mime_headers *m, 
357                               char** pcontent_start, 
358                               char *content_end)
359 {
360         char buf[SIZ];
361         char header[SIZ];
362         long headerlen;
363         char *ptr, *pch;
364         int buflen = 0;
365         int i;
366
367         // Learn interesting things from the headers
368         ptr = *pcontent_start;
369         *header = '\0';
370         headerlen = 0;
371         do {
372                 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
373
374                 for (i = 0; i < buflen; ++i) {
375                         if (isspace(buf[i])) {
376                                 buf[i] = ' ';
377                         }
378                 }
379
380                 if (!isspace(buf[0]) && (headerlen > 0)) {
381                         if (!strncasecmp(header, "Content-type:", 13)) {
382                                 memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
383                                 m->b[content_type].Key[headerlen - 12] = '\0';
384                                 m->b[content_type].len = string_trim (m->b[content_type].Key);
385
386                                 m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
387                                 m->b[charset].len           = extract_key(m->b[charset].Key,           CKEY(m->b[content_type]), HKEY("charset"), '=');
388                                 m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
389
390                                 /* Deal with weird headers */
391                                 pch = strchr(m->b[content_type].Key, ' ');
392                                 if (pch != NULL) {
393                                         *pch = '\0';
394                                         m->b[content_type].len = m->b[content_type].Key - pch;
395                                 }
396                                 pch = strchr(m->b[content_type].Key, ';');
397                                 if (pch != NULL) {
398                                         *pch = '\0';
399                                         m->b[content_type].len = m->b[content_type].Key - pch;
400                                 }
401                         }
402                         else if (!strncasecmp(header, "Content-Disposition:", 20)) {
403                                 memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
404                                 m->b[disposition].Key[headerlen - 19] = '\0';
405                                 m->b[disposition].len = string_trim(m->b[disposition].Key);
406
407                                 m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
408                                 m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
409                                 pch = strchr(m->b[disposition].Key, ';');
410                                 if (pch != NULL) *pch = '\0';
411                                 m->b[disposition].len = string_trim(m->b[disposition].Key);
412                         }
413                         else if (!strncasecmp(header, "Content-ID:", 11)) {
414                                 memcpy(m->b[id].Key, &header[11], headerlen - 11);
415                                 m->b[id].Key[headerlen - 11] = '\0';
416                                 string_trim(m->b[id].Key);
417                                 m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
418                         }
419                         else if (!strncasecmp(header, "Content-length: ", 15)) {
420                                 char *clbuf;
421                                 clbuf = &header[15];
422                                 while (isspace(*clbuf))
423                                         clbuf ++;
424                                 m->content_length = (size_t) atol(clbuf);
425                         }
426                         else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
427                                 memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
428                                 m->b[encoding].Key[headerlen - 26] = '\0';
429                                 m->b[encoding].len = string_trim(m->b[encoding].Key);
430                         }
431                         *header = '\0';
432                         headerlen = 0;
433                 }
434                 if ((headerlen + buflen + 2) < SIZ) {
435                         memcpy(&header[headerlen], buf, buflen);
436                         headerlen += buflen;
437                         header[headerlen] = '\0';
438                 }
439                 if (ptr >= content_end) {
440                         return -1;
441                 }
442         } while ((!IsEmptyStr(buf)) && (*ptr != 0));
443
444         m->is_multipart = m->b[boundary].len != 0;
445         *pcontent_start = ptr;
446
447         return 0;
448 }
449
450
451 static int IsAsciiEncoding(interesting_mime_headers *m) {
452
453         if ((m->b[encoding].len != 0) &&
454             (strcasecmp(m->b[encoding].Key, "base64") == 0))
455                 return 1;
456         if ((m->b[encoding].len != 0) &&
457             (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
458                 return 1;
459
460         return 0;
461 }
462
463 static char *FindNextContent(char *ptr,
464                              char *content_end,
465                              interesting_mime_headers *SubMimeHeaders,
466                              interesting_mime_headers *m)
467 {
468         char *next_boundary;
469         char  tmp;
470
471         if (IsAsciiEncoding(SubMimeHeaders)) {
472                 tmp = *content_end;
473                 *content_end = '\0';
474
475                 // ok, if we have a content length of the mime part, 
476                 // try skipping the content on the search for the next
477                 // boundary. since we don't trust the content_length
478                 // to be all accurate, and suspect it to lose one digit 
479                 // per line with a line length of 80 chars, we need 
480                 // to start searching a little before..
481                                    
482                 if ((SubMimeHeaders->content_length != -1) && (SubMimeHeaders->content_length > 10)) {
483                         char *pptr;
484                         long lines;
485                                         
486                         lines = SubMimeHeaders->content_length / 80;
487                         pptr = ptr + SubMimeHeaders->content_length - lines - 10;
488                         if (pptr < content_end)
489                                 ptr = pptr;
490                 }
491                         
492                 next_boundary = strstr(ptr, m->b[startary].Key);
493                 *content_end = tmp;
494         }
495         else {
496                 char *srch;
497                 // ok, if we have a content length of the mime part, 
498                 // try skipping the content on the search for the next
499                 // boundary. since we don't trust the content_length
500                 // to be all accurate, start searching a little before..
501                                    
502                 if ((SubMimeHeaders->content_length != -1) && (SubMimeHeaders->content_length > 10)) {
503                         char *pptr;
504                         pptr = ptr + SubMimeHeaders->content_length - 10;
505                         if (pptr < content_end)
506                                 ptr = pptr;
507                 }
508                 
509                 srch = next_boundary = NULL;
510                 for (srch = memchr(ptr, '-',  content_end - ptr);
511                      (srch != NULL) && (srch < content_end); 
512                      srch = memchr(srch, '-',  content_end - srch)) 
513                 {
514                         if (!memcmp(srch, m->b[startary].Key, m->b[startary].len)) {
515                                 next_boundary = srch;
516                                 srch = content_end;
517                         }
518                         else srch ++;
519
520                 }
521
522         }
523         return next_boundary;
524 }
525
526
527 // Break out the components of a multipart message
528 // (This function expects to be fed HEADERS + CONTENT)
529 // Note: NULL can be supplied as content_end; in this case, the message is
530 // considered to have ended when the parser encounters a 0x00 byte.
531 static void recurseable_mime_parser(char *partnum,
532                                     char *content_start, char *content_end,
533                                     MimeParserCallBackType CallBack,
534                                     MimeParserCallBackType PreMultiPartCallBack,
535                                     MimeParserCallBackType PostMultiPartCallBack,
536                                     void *userdata,
537                                     int dont_decode, 
538                                     interesting_mime_headers *m)
539 {
540         interesting_mime_headers *SubMimeHeaders;
541         char     *ptr;
542         char     *part_start;
543         char     *part_end = NULL;
544         char     *evaluate_crlf_ptr = NULL;
545         char     *next_boundary;
546         char      nested_partnum[256];
547         int       crlf_in_use = 0;
548         int       part_seq = 0;
549         CBufStr  *chosen_name;
550
551
552         // If this is a multipart message, then recursively process it
553         ptr = content_start;
554         part_start = NULL;
555         if (m->is_multipart) {
556
557                 // Tell the client about this message's multipartedness
558                 if (PreMultiPartCallBack != NULL) {
559                         PreMultiPartCallBack("", 
560                                              "", 
561                                              partnum, 
562                                              "",
563                                              NULL, 
564                                              m->b[content_type].Key, 
565                                              m->b[charset].Key,
566                                              0, 
567                                              m->b[encoding].Key, 
568                                              m->b[id].Key, 
569                                              userdata);
570                 }
571
572                 // Figure out where the boundaries are
573                 m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
574                 SubMimeHeaders = InitInterestingMimes ();
575
576                 while ((*ptr == '\r') || (*ptr == '\n')) {
577                         ptr++;
578                 }
579
580                 if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0) {
581                         ptr += m->b[startary].len;
582                 }
583
584                 while ((*ptr == '\r') || (*ptr == '\n')) {
585                         ptr ++;
586                 }
587
588                 part_start = NULL;
589                 do {
590                         char *optr;
591
592                         optr = ptr;
593                         if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
594                                 break;
595                         if ((ptr - optr > 2) && (*(ptr - 2) == '\r')) {
596                                 crlf_in_use = 1;
597                         }
598                         
599                         part_start = ptr;
600                         
601                         next_boundary = FindNextContent(ptr, content_end, SubMimeHeaders, m);
602                         if ((next_boundary != NULL) && (next_boundary - part_start < 3)) {
603                                 FlushInterestingMimes(SubMimeHeaders);
604                                 continue;
605                         }
606
607                         if ( (part_start != NULL) && (next_boundary != NULL) ) {
608                                 part_end = next_boundary;
609                                 --part_end;             // omit the trailing LF
610                                 if (crlf_in_use) {
611                                         --part_end;     // omit the trailing CR
612                                 }
613
614                                 if (!IsEmptyStr(partnum)) {
615                                         snprintf(nested_partnum,
616                                                  sizeof nested_partnum,
617                                                  "%s.%d", partnum,
618                                                  ++part_seq);
619                                 }
620                                 else {
621                                         snprintf(nested_partnum,
622                                                  sizeof nested_partnum,
623                                                  "%d", ++part_seq);
624                                 }
625                                 recurseable_mime_parser(nested_partnum,
626                                                         part_start, 
627                                                         part_end,
628                                                         CallBack,
629                                                         PreMultiPartCallBack,
630                                                         PostMultiPartCallBack,
631                                                         userdata,
632                                                         dont_decode, 
633                                                         SubMimeHeaders);
634                         }
635
636                         if (next_boundary != NULL) {
637                                 // If we pass out of scope, don't attempt to read past the end boundary.
638                                 if ((*(next_boundary + m->b[startary].len) == '-') && 
639                                     (*(next_boundary + m->b[startary].len + 1) == '-') ){
640                                         ptr = content_end;
641                                 }
642                                 else {
643                                         // Set up for the next part.
644                                         part_start = strstr(next_boundary, "\n");
645                                         
646                                         // Determine whether newlines are LF or CRLF
647                                         evaluate_crlf_ptr = part_start;
648                                         --evaluate_crlf_ptr;
649                                         if ((*evaluate_crlf_ptr == '\r') && (*(evaluate_crlf_ptr + 1) == '\n')) {
650                                                 crlf_in_use = 1;
651                                         }
652                                         else {
653                                                 crlf_in_use = 0;
654                                         }
655
656                                         // Advance past the LF ... now we're in the next part
657                                         ++part_start;
658                                         ptr = part_start;
659                                 }
660                         }
661                         else {
662                                 // Invalid end of multipart.  Bail out!
663                                 ptr = content_end;
664                         }
665                         FlushInterestingMimes(SubMimeHeaders);
666                 } while ( (ptr < content_end) && (next_boundary != NULL) );
667
668                 free(SubMimeHeaders);
669
670                 if (PostMultiPartCallBack != NULL) {
671                         PostMultiPartCallBack("", 
672                                               "", 
673                                               partnum, 
674                                               "", 
675                                               NULL,
676                                               m->b[content_type].Key, 
677                                               m->b[charset].Key,
678                                               0, 
679                                               m->b[encoding].Key, 
680                                               m->b[id].Key, 
681                                               userdata);
682                 }
683         }
684         // If it's not a multipart message, then do something with it
685         else {
686                 size_t length;
687                 part_start = ptr;
688                 length = content_end - part_start;
689                 ptr = part_end = content_end;
690
691                 /* The following code will truncate the MIME part to the size
692                  * specified by the Content-length: header.   We have commented it
693                  * out because these headers have a tendency to be wrong.
694                  *
695                  *      if ( (content_length > 0) && (length > content_length) ) {
696                  *              length = content_length;
697                  *      }
698                  */
699
700                 /* Sometimes the "name" field is tacked on to Content-type,
701                  * and sometimes it's tacked on to Content-disposition.  Use
702                  * whichever one we have.
703                  */
704                 if (m->b[content_disposition_name].len > m->b[content_type_name].len) {
705                         chosen_name = &m->b[content_disposition_name];
706                 }
707                 else {
708                         chosen_name = &m->b[content_type_name];
709                 }
710         
711                 // Ok, we've got a non-multipart part here, so do something with it.
712                 mime_decode(partnum,
713                             part_start, 
714                             length,
715                             m->b[content_type].Key, 
716                             m->b[charset].Key,
717                             m->b[encoding].Key, 
718                             m->b[disposition].Key, 
719                             m->b[id].Key, 
720                             chosen_name->Key, 
721                             m->b[filename].Key,
722                             CallBack, 
723                             NULL, NULL,
724                             userdata, 
725                             dont_decode
726                         );
727
728                 /*
729                  * Now if it's an encapsulated message/rfc822 then we have to recurse into it
730                  */
731                 if (!strcasecmp(&m->b[content_type].Key[0], "message/rfc822")) {
732
733                         if (PreMultiPartCallBack != NULL) {
734                                 PreMultiPartCallBack("", 
735                                                      "", 
736                                                      partnum, 
737                                                      "",
738                                                      NULL, 
739                                                      m->b[content_type].Key, 
740                                                      m->b[charset].Key,
741                                                      0, 
742                                                      m->b[encoding].Key, 
743                                                      m->b[id].Key, 
744                                                      userdata);
745                         }
746                         if (CallBack != NULL) {
747                                 if (strlen(partnum) > 0) {
748                                         snprintf(nested_partnum,
749                                                  sizeof nested_partnum,
750                                                  "%s.%d", partnum,
751                                                  ++part_seq);
752                                 }
753                                 else {
754                                         snprintf(nested_partnum,
755                                                  sizeof nested_partnum,
756                                                  "%d", ++part_seq);
757                                 }
758                                 the_mime_parser(nested_partnum,
759                                                 part_start, 
760                                                 part_end,
761                                                 CallBack,
762                                                 PreMultiPartCallBack,
763                                                 PostMultiPartCallBack,
764                                                 userdata,
765                                                 dont_decode
766                                         );
767                         }
768                         if (PostMultiPartCallBack != NULL) {
769                                 PostMultiPartCallBack("", 
770                                                       "", 
771                                                       partnum, 
772                                                       "", 
773                                                       NULL,
774                                                       m->b[content_type].Key, 
775                                                       m->b[charset].Key,
776                                                       0, 
777                                                       m->b[encoding].Key, 
778                                                       m->b[id].Key, 
779                                                       userdata);
780                         }
781
782
783                 }
784
785         }
786
787 }
788
789
790 // Break out the components of a multipart message
791 // (This function expects to be fed HEADERS + CONTENT)
792 // Note: NULL can be supplied as content_end; in this case, the message is
793 // considered to have ended when the parser encounters a 0x00 byte.
794 void the_mime_parser(char *partnum,
795                      char *content_start, char *content_end,
796                      MimeParserCallBackType CallBack,
797                      MimeParserCallBackType PreMultiPartCallBack,
798                      MimeParserCallBackType PostMultiPartCallBack,
799                      void *userdata,
800                      int dont_decode)
801 {
802         interesting_mime_headers *m;
803
804         // If the caller didn't supply an endpointer, generate one by measure
805         if (content_end == NULL) {
806                 content_end = &content_start[strlen(content_start)];
807         }
808
809         m = InitInterestingMimes();
810
811         if (!parse_MimeHeaders(m, &content_start, content_end)) {
812
813                 recurseable_mime_parser(partnum,
814                                         content_start, content_end,
815                                         CallBack,
816                                         PreMultiPartCallBack,
817                                         PostMultiPartCallBack,
818                                         userdata,
819                                         dont_decode,
820                                         m);
821         }
822         free(m);
823 }
824
825
826 // Entry point for the MIME parser.
827 // (This function expects to be fed HEADERS + CONTENT)
828 // Note: NULL can be supplied as content_end; in this case, the message is
829 // considered to have ended when the parser encounters a 0x00 byte.
830 void mime_parser(char *content_start,
831                  char *content_end,
832                  MimeParserCallBackType CallBack,
833                  MimeParserCallBackType PreMultiPartCallBack,
834                  MimeParserCallBackType PostMultiPartCallBack,
835                  void *userdata,
836                  int dont_decode)
837 {
838         the_mime_parser("", content_start, content_end,
839                         CallBack,
840                         PreMultiPartCallBack,
841                         PostMultiPartCallBack,
842                         userdata, dont_decode);
843 }
844
845
846 typedef struct _MimeGuess {
847         const char *Pattern;
848         size_t PatternLen;
849         long PatternOffset;
850         const char *MimeString;
851 } MimeGuess;
852
853 MimeGuess MyMimes [] = {
854         {
855                 "GIF",
856                 3,
857                 0,
858                 "image/gif"
859         },
860         {
861                 "\xff\xd8",
862                 2,
863                 0,
864                 "image/jpeg"
865         },
866         {
867                 "\x89PNG",
868                 4,
869                 0,
870                 "image/png"
871         },
872         { // last...
873                 "",
874                 0,
875                 0,
876                 ""
877         }
878 };
879
880
881 const char *GuessMimeType(const char *data, size_t dlen) {
882         int MimeIndex = 0;
883
884         while (MyMimes[MimeIndex].PatternLen != 0) {
885                 if ((MyMimes[MimeIndex].PatternLen + 
886                      MyMimes[MimeIndex].PatternOffset < dlen) &&
887                     strncmp(MyMimes[MimeIndex].Pattern, 
888                             &data[MyMimes[MimeIndex].PatternOffset], 
889                             MyMimes[MimeIndex].PatternLen) == 0)
890                 {
891                         return MyMimes[MimeIndex].MimeString;
892                 }
893                 MimeIndex ++;
894         }
895         /* 
896          * ok, our simple minded algorythm didn't find anything, 
897          * let the big chegger try it, he wil default to application/octet-stream
898          */
899         return (xdg_mime_get_mime_type_for_data(data, dlen));
900 }
901
902
903 const char* GuessMimeByFilename(const char *what, size_t len) {
904         // we know some hardcoded on our own, try them...
905         if ((len > 3) && !strncasecmp(&what[len - 4], ".gif", 4))
906                 return "image/gif";
907         else if ((len > 2) && !strncasecmp(&what[len - 3], ".js", 3))
908                 return  "text/javascript";
909         else if ((len > 3) && !strncasecmp(&what[len - 4], ".txt", 4))
910                 return "text/plain";
911         else if ((len > 3) && !strncasecmp(&what[len - 4], ".css", 4))
912                 return "text/css";
913         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htc", 4))
914                 return "text/x-component";
915         else if ((len > 3) && !strncasecmp(&what[len - 4], ".jpg", 4))
916                 return "image/jpeg";
917         else if ((len > 4) && !strncasecmp(&what[len - 5], ".jpeg", 5))
918                 return "image/jpeg";
919         else if ((len > 3) && !strncasecmp(&what[len - 4], ".png", 4))
920                 return "image/png";
921         else if ((len > 3) && !strncasecmp(&what[len - 4], ".ico", 4))
922                 return "image/x-icon";
923         else if ((len > 3) && !strncasecmp(&what[len - 4], ".vcf", 4))
924                 return "text/x-vcard";
925         else if ((len > 4) && !strncasecmp(&what[len - 5], ".html", 5))
926                 return "text/html";
927         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htm", 4))
928                 return "text/html";
929         else if ((len > 3) && !strncasecmp(&what[len - 4], ".wml", 4))
930                 return "text/vnd.wap.wml";
931         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmls", 5))
932                 return "text/vnd.wap.wmlscript";
933         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmlc", 5))
934                 return "application/vnd.wap.wmlc";
935         else if ((len > 5) && !strncasecmp(&what[len - 6], ".wmlsc", 6))
936                 return "application/vnd.wap.wmlscriptc";
937         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wbmp", 5))
938                 return "image/vnd.wap.wbmp";
939         else
940                 // and let xdgmime do the fallback.
941                 return xdg_mime_get_mime_type_from_file_name(what);
942 }
943
944 static HashList *IconHash = NULL;
945
946 typedef struct IconName IconName;
947
948 struct IconName {
949         char *FlatName;
950         char *FileName;
951 };
952
953
954 static void DeleteIcon(void *IconNamePtr) {
955         IconName *Icon = (IconName*) IconNamePtr;
956         free(Icon->FlatName);
957         free(Icon->FileName);
958         free(Icon);
959 }
960
961
962 #define GENSTR "x-generic"
963 #define IGNORE_PREFIX_1 "gnome-mime"
964 int LoadIconDir(const char *DirName) {
965         DIR *filedir = NULL;
966         struct dirent *filedir_entry;
967         int d_namelen;
968         int d_without_ext;
969         IconName *Icon;
970
971         filedir = opendir (DirName);
972         IconHash = NewHash(1, NULL);
973         if (filedir == NULL) {
974                 return 0;
975         }
976
977         while ((filedir_entry = readdir(filedir))) {
978                 char *MinorPtr;
979                 char *PStart;
980 #ifdef _DIRENT_HAVE_D_NAMLEN
981                 d_namelen = filedir_entry->d_namlen;
982 #else
983                 d_namelen = strlen(filedir_entry->d_name);
984 #endif
985                 d_without_ext = d_namelen;
986                 while ((d_without_ext > 0) && (filedir_entry->d_name[d_without_ext] != '.'))
987                         d_without_ext --;
988                 if ((d_without_ext == 0) || (d_namelen < 3))
989                         continue;
990
991                 if ((sizeof(IGNORE_PREFIX_1) < d_namelen) &&
992                     (strncmp(IGNORE_PREFIX_1, 
993                              filedir_entry->d_name, 
994                              sizeof(IGNORE_PREFIX_1) - 1) == 0)) {
995                         PStart = filedir_entry->d_name + sizeof(IGNORE_PREFIX_1);
996                         d_without_ext -= sizeof(IGNORE_PREFIX_1);
997                 }
998                 else {
999                         PStart = filedir_entry->d_name;
1000                 }
1001                 Icon = malloc(sizeof(IconName));
1002
1003                 Icon->FileName = malloc(d_namelen + 1);
1004                 memcpy(Icon->FileName, filedir_entry->d_name, d_namelen + 1);
1005
1006                 Icon->FlatName = malloc(d_without_ext + 1);
1007                 memcpy(Icon->FlatName, PStart, d_without_ext);
1008                 Icon->FlatName[d_without_ext] = '\0';
1009                 // Try to find Minor type in image-jpeg
1010                 MinorPtr = strchr(Icon->FlatName, '-');
1011                 if (MinorPtr != NULL) {
1012                         size_t MinorLen;
1013                         MinorLen = 1 + d_without_ext - (MinorPtr - Icon->FlatName + 1);
1014                         if ((MinorLen == sizeof(GENSTR)) && 
1015                             (strncmp(MinorPtr + 1, GENSTR, sizeof(GENSTR)) == 0)) {
1016                                 // ok, we found a generic filename. cut the generic.
1017                                 *MinorPtr = '\0';
1018                                 d_without_ext = d_without_ext - (MinorPtr - Icon->FlatName);
1019                         }
1020                         else { // Map the major / minor separator to /
1021                                 *MinorPtr = '/';
1022                         }
1023                 }
1024
1025                 Put(IconHash, Icon->FlatName, d_without_ext, Icon, DeleteIcon);
1026         }
1027         closedir(filedir);
1028         return 1;
1029 }
1030
1031
1032 const char *GetIconFilename(char *MimeType, size_t len) {
1033         void *vIcon;
1034         IconName *Icon;
1035         
1036         if (IconHash == NULL) {
1037                 return NULL;
1038         }
1039
1040         GetHash(IconHash, MimeType, len, &vIcon), Icon = (IconName*) vIcon;
1041         // didn't find the exact mimetype? try major only.
1042         if (Icon == NULL) {
1043                 char * pMinor;
1044                 pMinor = strchr(MimeType, '/');
1045                 if (pMinor != NULL) {
1046                         *pMinor = '\0';
1047                         GetHash(IconHash, MimeType, pMinor - MimeType, &vIcon),
1048                                 Icon = (IconName*) vIcon;
1049                 }
1050         }
1051         if (Icon == NULL) {
1052                 return NULL;
1053         }
1054
1055         return Icon->FileName;
1056 }
1057
1058
1059 void ShutDownLibCitadelMime(void) {
1060         DeleteHash(&IconHash);
1061 }