Be more conservative about when not to apply binary searching for the next boundary...
[citadel.git] / libcitadel / lib / mime_parser.c
1 /*
2  * This is the MIME parser for Citadel.
3  *
4  * Copyright (c) 1998-2010 by the citadel.org development team.
5  * This code is distributed under the GNU General Public License v3.
6  *
7  */
8
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <stdio.h>
12 #include <signal.h>
13 #include <sys/types.h>
14 #include <ctype.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/types.h>
18 #include <dirent.h>
19 #include <errno.h>
20
21 #include "xdgmime/xdgmime.h"
22 #include "libcitadel.h"
23 #include "libcitadellocal.h"
24
25 const unsigned char FromHexTable [256] = {
26         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //  0
27         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 10
28         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 20
29         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 30
30         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, // 40
31         0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, // 50
32         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, // 60
33         0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 70
34         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 80
35         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, // 90
36         0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //100
37         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //110
38         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //120
39         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //130
40         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //140
41         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //150
42         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //160
43         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //170
44         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //180
45         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //190
46         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //200
47         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //210
48         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //220
49         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //230
50         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //240
51         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF                          //250
52 };
53
54
55 long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd)
56 {
57         char *sptr, *ptr = NULL;
58         int double_quotes = 0;
59         long RealKeyLen = keylen;
60
61         sptr = source;
62
63         while (sptr != NULL)
64         {
65                 ptr = bmstrcasestr_len(sptr, sourcelen - (sptr - source), 
66                                        key, keylen);
67                 if(ptr != NULL)
68                 {
69                         while (isspace(*(ptr + RealKeyLen)))
70                                 RealKeyLen ++;
71                         if (*(ptr + RealKeyLen) == KeyEnd)
72                         {
73                                 sptr = NULL;
74                                 RealKeyLen ++;                          
75                         }
76                         else
77                         {
78                                 sptr = ptr + RealKeyLen + 1;
79                         }
80                 }
81                 else 
82                         sptr = ptr;
83         }
84         if (ptr == NULL) {
85                 *target = '\0';
86                 return 0;
87         }
88         strcpy(target, (ptr + RealKeyLen));
89
90         for (ptr=target; (*ptr != 0); ptr++) {
91
92                 /* A semicolon means we've hit the end of the key, unless we're inside double quotes */
93                 if ( (double_quotes != 1) && (*ptr == ';')) {
94                         *ptr = 0;
95                 }
96
97                 /* if we find double quotes, we've got a great set of string boundaries */
98                 if (*ptr == '\"') {
99                         ++double_quotes;
100                         if (double_quotes == 1) {
101                                 strcpy(ptr, ptr+1);
102                         }
103                         else {
104                                 *ptr = 0;
105                         }
106                 }
107         }
108         *ptr = '\0';
109         return ptr - target;
110 }
111
112
113 /*
114  * For non-multipart messages, we need to generate a quickie partnum of "1"
115  * to return to callback functions.  Some callbacks demand it.
116  */
117 char *fixed_partnum(char *supplied_partnum) {
118         if (supplied_partnum == NULL) return "1";
119         if (strlen(supplied_partnum)==0) return "1";
120         return supplied_partnum;
121 }
122
123
124 static inline unsigned int _decode_hex(const char *Source)
125 {
126         int ret = '?';
127         unsigned char LO_NIBBLE;
128         unsigned char HI_NIBBLE;
129
130         HI_NIBBLE = FromHexTable[(unsigned char) *Source];
131         LO_NIBBLE = FromHexTable[(unsigned char) *(Source+1)];
132         
133         if ((LO_NIBBLE == 0xFF) || (LO_NIBBLE == 0xFF))
134                 return ret;
135         ret = HI_NIBBLE;
136         ret = ret << 4;
137         ret = ret | LO_NIBBLE;
138         return ret;
139 }
140
141 unsigned int decode_hex(char *Source) {return _decode_hex(Source);}
142
143 /*
144  * Convert "quoted-printable" to binary.  Returns number of bytes decoded.
145  * according to RFC2045 section 6.7
146  */
147 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
148         unsigned int ch;
149         int decoded_length = 0;
150         int pos = 0;
151
152         while (pos < sourcelen)
153         {
154                 if (*(encoded + pos) == '=')
155                 {
156                         pos ++;
157                         if (*(encoded + pos) == '\n')
158                         {
159                                 pos ++;
160                         }
161                         else if (*(encoded + pos) == '\r')
162                         {
163                                 pos ++;
164                                 if (*(encoded + pos) == '\n')
165                                         pos++;
166                         }
167                         else
168                         {
169                                 ch = 0;
170                                 ch = _decode_hex(&encoded[pos]);
171                                 pos += 2;
172                                 decoded[decoded_length++] = ch;
173                         }
174                 }
175                 else
176                 {
177                         decoded[decoded_length++] = encoded[pos];
178                         pos += 1;
179                 }
180         }
181         decoded[decoded_length] = 0;
182         return(decoded_length);
183 }
184
185
186 /*
187  * Given a message or message-part body and a length, handle any necessary
188  * decoding and pass the request up the stack.
189  */
190 void mime_decode(char *partnum,
191                  char *part_start, size_t length,
192                  char *content_type, char *charset, char *encoding,
193                  char *disposition,
194                  char *id,
195                  char *name, char *filename,
196                  MimeParserCallBackType CallBack,
197                  MimeParserCallBackType PreMultiPartCallBack,
198                  MimeParserCallBackType PostMultiPartCallBack,
199                  void *userdata,
200                  int dont_decode)
201 {
202
203         char *decoded;
204         size_t bytes_decoded = 0;
205
206         /* Some encodings aren't really encodings */
207         if (!strcasecmp(encoding, "7bit"))
208                 strcpy(encoding, "");
209         if (!strcasecmp(encoding, "8bit"))
210                 strcpy(encoding, "");
211         if (!strcasecmp(encoding, "binary"))
212                 strcpy(encoding, "");
213
214         /* If this part is not encoded, send as-is */
215         if ( (strlen(encoding) == 0) || (dont_decode)) {
216                 if (CallBack != NULL) {
217                         CallBack(name, 
218                                  filename, 
219                                  fixed_partnum(partnum),
220                                  disposition, 
221                                  part_start,
222                                  content_type, 
223                                  charset, 
224                                  length, 
225                                  encoding, 
226                                  id,
227                                  userdata);
228                         }
229                 return;
230         }
231         
232         /* Fail silently if we hit an unknown encoding. */
233         if ((strcasecmp(encoding, "base64"))
234             && (strcasecmp(encoding, "quoted-printable"))) {
235                 return;
236         }
237
238         /*
239          * Allocate a buffer for the decoded data.  The output buffer is slightly
240          * larger than the input buffer; this assumes that the decoded data
241          * will never be significantly larger than the encoded data.  This is a
242          * safe assumption with base64, uuencode, and quoted-printable.
243          */
244         decoded = malloc(length + 32768);
245         if (decoded == NULL) {
246                 return;
247         }
248
249         if (!strcasecmp(encoding, "base64")) {
250                 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
251         }
252         else if (!strcasecmp(encoding, "quoted-printable")) {
253                 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
254         }
255
256         if (bytes_decoded > 0) if (CallBack != NULL) {
257                         char encoding_buf[SIZ];
258
259                         strcpy(encoding_buf, "binary");
260                         CallBack(name, 
261                                  filename, 
262                                  fixed_partnum(partnum),
263                                  disposition, 
264                                  decoded,
265                                  content_type, 
266                                  charset, 
267                                  bytes_decoded, 
268                                  encoding_buf, 
269                                  id, 
270                                  userdata);
271         }
272
273         free(decoded);
274 }
275
276 /*
277  * this is the extract of mime_decode which can be called if 'dont_decode' was set; 
278  * to save the cpu intense process of decoding to the time when it realy wants the content. 
279  * returns: 
280  *   - > 0 we decoded something, its on *decoded, you need to free it.
281  *   - = 0 no need to decode stuff. *decoded will be NULL.
282  *   - < 0 an error occured, either an unknown encoding, or alloc failed. no need to free.
283  */
284 int mime_decode_now (char *part_start, 
285                      size_t length,
286                      char *encoding,
287                      char **decoded,
288                      size_t *bytes_decoded)
289 {
290         *bytes_decoded = 0;
291         *decoded = NULL;
292         /* Some encodings aren't really encodings */
293         if (!strcasecmp(encoding, "7bit"))
294                 strcpy(encoding, "");
295         if (!strcasecmp(encoding, "8bit"))
296                 strcpy(encoding, "");
297         if (!strcasecmp(encoding, "binary"))
298                 strcpy(encoding, "");
299
300         /* If this part is not encoded, send as-is */
301         if (strlen(encoding) == 0) {
302                 return 0;
303         }
304         
305
306         /* Fail if we hit an unknown encoding. */
307         if ((strcasecmp(encoding, "base64"))
308             && (strcasecmp(encoding, "quoted-printable"))) {
309                 return -1;
310         }
311
312         /*
313          * Allocate a buffer for the decoded data.  The output buffer is slightly
314          * larger than the input buffer; this assumes that the decoded data
315          * will never be significantly larger than the encoded data.  This is a
316          * safe assumption with base64, uuencode, and quoted-printable.
317          */
318         *decoded = malloc(length + 32768);
319         if (decoded == NULL) {
320                 return -1;
321         }
322
323         if (!strcasecmp(encoding, "base64")) {
324                 *bytes_decoded = CtdlDecodeBase64(*decoded, part_start, length);
325                 return 1;
326         }
327         else if (!strcasecmp(encoding, "quoted-printable")) {
328                 *bytes_decoded = CtdlDecodeQuotedPrintable(*decoded, part_start, length);
329                 return 1;
330         }
331         return -1;
332 }
333
334 typedef enum _eIntMimeHdrs {
335         boundary,
336         startary,
337         endary,
338         content_type,
339         charset,
340         encoding,
341         content_type_name,
342         content_disposition_name,
343         filename,
344         disposition,
345         id,
346         eMax /* don't move ! */
347 } eIntMimeHdrs;
348
349 typedef struct _CBufStr {
350         char Key[SIZ];
351         long len;
352 }CBufStr;
353
354 typedef struct _interesting_mime_headers {
355         CBufStr b[eMax];
356         long content_length;
357         long is_multipart;
358 } interesting_mime_headers;
359
360
361 static void FlushInterestingMimes(interesting_mime_headers *m)
362 {
363         int i;
364         
365         for (i = 0; i < eMax; i++) {
366              m->b[i].Key[0] = '\0';
367              m->b[i].len = 0;
368         }
369         m->content_length = -1;
370 }
371 static interesting_mime_headers *InitInterestingMimes(void)
372 {
373         interesting_mime_headers *m;
374         m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
375
376         FlushInterestingMimes(m);
377
378         return m;
379 }
380
381
382 static long parse_MimeHeaders(interesting_mime_headers *m, 
383                               char** pcontent_start, 
384                               char *content_end)
385 {
386         char buf[SIZ];
387         char header[SIZ];
388         long headerlen;
389         char *ptr, *pch;
390         int buflen = 0;
391         int i;
392
393         /* Learn interesting things from the headers */
394         ptr = *pcontent_start;
395         *header = '\0';
396         headerlen = 0;
397         do {
398                 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
399
400                 for (i = 0; i < buflen; ++i) {
401                         if (isspace(buf[i])) {
402                                 buf[i] = ' ';
403                         }
404                 }
405
406                 if (!isspace(buf[0]) && (headerlen > 0)) {
407                         if (!strncasecmp(header, "Content-type:", 13)) {
408                                 memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
409                                 m->b[content_type].len = striplt (m->b[content_type].Key);
410
411                                 m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
412                                 m->b[charset].len           = extract_key(m->b[charset].Key,           CKEY(m->b[content_type]), HKEY("charset"), '=');
413                                 m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
414
415                                 /* Deal with weird headers */
416                                 pch = strchr(m->b[content_type].Key, ' ');
417                                 if (pch != NULL) {
418                                         *pch = '\0';
419                                         m->b[content_type].len = m->b[content_type].Key - pch;
420                                 }
421                                 pch = strchr(m->b[content_type].Key, ';');
422                                 if (pch != NULL) {
423                                         *pch = '\0';
424                                         m->b[content_type].len = m->b[content_type].Key - pch;
425                                 }
426                         }
427                         else if (!strncasecmp(header, "Content-Disposition:", 20)) {
428                                 memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
429                                 m->b[disposition].len = striplt(m->b[disposition].Key);
430
431                                 m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
432                                 m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
433                                 pch = strchr(m->b[disposition].Key, ';');
434                                 if (pch != NULL) *pch = '\0';
435                                 m->b[disposition].len = striplt(m->b[disposition].Key);
436                         }
437                         else if (!strncasecmp(header, "Content-ID:", 11)) {
438                                 memcpy(m->b[id].Key, &header[11], headerlen);
439                                 striplt(m->b[id].Key);
440                                 m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
441                         }
442                         else if (!strncasecmp(header, "Content-length: ", 15)) {
443                                 char *clbuf;
444                                 clbuf = &header[15];
445                                 while (isspace(*clbuf))
446                                         clbuf ++;
447                                 m->content_length = (size_t) atol(clbuf);
448                         }
449                         else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
450                                 memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
451                                 m->b[encoding].len = striplt(m->b[encoding].Key);
452                         }
453                         *header = '\0';
454                         headerlen = 0;
455                 }
456                 if ((headerlen + buflen + 2) < SIZ) {
457                         memcpy(&header[headerlen], buf, buflen);
458                         headerlen += buflen;
459                         header[headerlen] = '\0';
460                 }
461                 if (ptr >= content_end) {
462                         return -1;
463                 }
464         } while ((!IsEmptyStr(buf)) && (*ptr != 0));
465
466         m->is_multipart = m->b[boundary].len != 0;
467         *pcontent_start = ptr;
468
469         return 0;
470 }
471
472
473 static int IsAsciiEncoding(interesting_mime_headers *m)
474 {
475
476         if ((m->b[encoding].len != 0) &&
477             (strcasecmp(m->b[encoding].Key, "base64") == 0))
478                 return 1;
479         if ((m->b[encoding].len != 0) &&
480             (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
481                 return 1;
482
483         return 0;
484 }
485
486 static char *FindNextContent(char *ptr,
487                              char *content_end,
488                              interesting_mime_headers *SubMimeHeaders,
489                              interesting_mime_headers *m)
490 {
491         char *next_boundary;
492         char  tmp;
493
494         if (IsAsciiEncoding(SubMimeHeaders)) {
495                 tmp = *content_end;
496                 *content_end = '\0';
497
498                 /** 
499                  * ok, if we have a content length of the mime part, 
500                  * try skipping the content on the search for the next
501                  * boundary. since we don't trust the content_length
502                  * to be all accurate, and suspect it to lose one digit 
503                  * per line with a line length of 80 chars, we need 
504                  * to start searching a little before..
505                  */
506                                    
507                 if ((SubMimeHeaders->content_length != -1) &&
508                     (SubMimeHeaders->content_length > 10))
509                 {
510                         char *pptr;
511                         long lines;
512                                         
513                         lines = SubMimeHeaders->content_length / 80;
514                         pptr = ptr + SubMimeHeaders->content_length - lines - 10;
515                         if (pptr < content_end)
516                                 ptr = pptr;
517                 }
518                         
519                 next_boundary = strstr(ptr, m->b[startary].Key);
520                 *content_end = tmp;
521         }
522         else {
523                 char *srch;
524                 /** 
525                  * ok, if we have a content length of the mime part, 
526                  * try skipping the content on the search for the next
527                  * boundary. since we don't trust the content_length
528                  * to be all accurate, start searching a little before..
529                  */
530                                    
531                 if ((SubMimeHeaders->content_length != -1) &&
532                     (SubMimeHeaders->content_length > 10))
533                 {
534                         char *pptr;
535                         pptr = ptr + SubMimeHeaders->content_length - 10;
536                         if (pptr < content_end)
537                                 ptr = pptr;
538                 }
539                 next_boundary = NULL;
540                 for (srch=ptr; srch<content_end; ++srch) {
541                         if (!memcmp(srch, 
542                                     m->b[startary].Key, 
543                                     m->b[startary].len)) 
544                         {
545                                 next_boundary = srch;
546                                 srch = content_end;
547                         }
548                 }
549
550         }
551         return next_boundary;
552 }
553
554 /*
555  * Break out the components of a multipart message
556  * (This function expects to be fed HEADERS + CONTENT)
557  * Note: NULL can be supplied as content_end; in this case, the message is
558  * considered to have ended when the parser encounters a 0x00 byte.
559  */
560 static void recurseable_mime_parser(char *partnum,
561                                     char *content_start, char *content_end,
562                                     MimeParserCallBackType CallBack,
563                                     MimeParserCallBackType PreMultiPartCallBack,
564                                     MimeParserCallBackType PostMultiPartCallBack,
565                                     void *userdata,
566                                     int dont_decode, 
567                                     interesting_mime_headers *m)
568 {
569         interesting_mime_headers *SubMimeHeaders;
570         char     *ptr;
571         char     *part_start;
572         char     *part_end = NULL;
573         char     *evaluate_crlf_ptr = NULL;
574         char     *next_boundary;
575         char      nested_partnum[256];
576         int       crlf_in_use = 0;
577         int       part_seq = 0;
578         CBufStr  *chosen_name;
579
580
581         /* If this is a multipart message, then recursively process it */
582         ptr = content_start;
583         part_start = NULL;
584         if (m->is_multipart) {
585
586                 /* Tell the client about this message's multipartedness */
587                 if (PreMultiPartCallBack != NULL) {
588                         PreMultiPartCallBack("", 
589                                              "", 
590                                              partnum, 
591                                              "",
592                                              NULL, 
593                                              m->b[content_type].Key, 
594                                              m->b[charset].Key,
595                                              0, 
596                                              m->b[encoding].Key, 
597                                              m->b[id].Key, 
598                                              userdata);
599                 }
600
601                 /* Figure out where the boundaries are */
602                 m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
603                 SubMimeHeaders = InitInterestingMimes ();
604                 if (*ptr == '\r')
605                         ptr ++;
606                 if (*ptr == '\n')
607                         ptr ++;
608                 if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
609                         ptr += m->b[startary].len;
610                 if (*ptr == '\r')
611                         ptr ++;
612                 if (*ptr == '\n')
613                         ptr ++;
614                 part_start = NULL;
615                 do {
616
617                         if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
618                                 break;
619                         part_start = ptr;
620                         
621                         next_boundary = FindNextContent(ptr,
622                                                         content_end,
623                                                         SubMimeHeaders,
624                                                         m);
625                         if ((next_boundary != NULL) && 
626                             (next_boundary - part_start < 3))
627                                 continue;
628
629                         if ( (part_start != NULL) && (next_boundary != NULL) ) {
630                                 part_end = next_boundary;
631                                 --part_end;             /* omit the trailing LF */
632                                 if (crlf_in_use) {
633                                         --part_end;     /* omit the trailing CR */
634                                 }
635
636                                 if (!IsEmptyStr(partnum)) {
637                                         snprintf(nested_partnum,
638                                                  sizeof nested_partnum,
639                                                  "%s.%d", partnum,
640                                                  ++part_seq);
641                                 }
642                                 else {
643                                         snprintf(nested_partnum,
644                                                  sizeof nested_partnum,
645                                                  "%d", ++part_seq);
646                                 }
647                                 recurseable_mime_parser(nested_partnum,
648                                                         part_start, 
649                                                         part_end,
650                                                         CallBack,
651                                                         PreMultiPartCallBack,
652                                                         PostMultiPartCallBack,
653                                                         userdata,
654                                                         dont_decode, 
655                                                         SubMimeHeaders);
656                         }
657
658                         if (next_boundary != NULL) {
659                                 /* If we pass out of scope, don't attempt to
660                                  * read past the end boundary. */
661                                 if ((*(next_boundary + m->b[startary].len + 1) == '-') && 
662                                     (*(next_boundary + m->b[startary].len + 2) == '-') ){
663                                         ptr = content_end;
664                                 }
665                                 else {
666                                         /* Set up for the next part. */
667                                         part_start = strstr(next_boundary, "\n");
668                                         
669                                         /* Determine whether newlines are LF or CRLF */
670                                         evaluate_crlf_ptr = part_start;
671                                         --evaluate_crlf_ptr;
672                                         if ((*evaluate_crlf_ptr == '\r') && 
673                                             (*(evaluate_crlf_ptr + 1) == '\n'))
674                                         {
675                                                 crlf_in_use = 1;
676                                         }
677                                         else {
678                                                 crlf_in_use = 0;
679                                         }
680
681                                         /* Advance past the LF ... now we're in the next part */
682                                         ++part_start;
683                                         ptr = part_start;
684                                 }
685                         }
686                         else {
687                                 /* Invalid end of multipart.  Bail out! */
688                                 ptr = content_end;
689                         }
690                         FlushInterestingMimes(SubMimeHeaders);
691                 } while ( (ptr < content_end) && (next_boundary != NULL) );
692
693                 free(SubMimeHeaders);
694
695                 if (PostMultiPartCallBack != NULL) {
696                         PostMultiPartCallBack("", 
697                                               "", 
698                                               partnum, 
699                                               "", 
700                                               NULL,
701                                               m->b[content_type].Key, 
702                                               m->b[charset].Key,
703                                               0, 
704                                               m->b[encoding].Key, 
705                                               m->b[id].Key, 
706                                               userdata);
707                 }
708         } /* If it's not a multipart message, then do something with it */
709         else {
710                 size_t length;
711                 part_start = ptr;
712                 length = content_end - part_start;
713                 ptr = part_end = content_end;
714
715
716                 /* The following code will truncate the MIME part to the size
717                  * specified by the Content-length: header.   We have commented it
718                  * out because these headers have a tendency to be wrong.
719                  *
720                  *      if ( (content_length > 0) && (length > content_length) ) {
721                  *              length = content_length;
722                  *      }
723                  */
724
725                 /* Sometimes the "name" field is tacked on to Content-type,
726                  * and sometimes it's tacked on to Content-disposition.  Use
727                  * whichever one we have.
728                  */
729                 if (m->b[content_disposition_name].len > m->b[content_type_name].len) {
730                         chosen_name = &m->b[content_disposition_name];
731                 }
732                 else {
733                         chosen_name = &m->b[content_type_name];
734                 }
735         
736                 /* Ok, we've got a non-multipart part here, so do something with it.
737                  */
738                 mime_decode(partnum,
739                             part_start, 
740                             length,
741                             m->b[content_type].Key, 
742                             m->b[charset].Key,
743                             m->b[encoding].Key, 
744                             m->b[disposition].Key, 
745                             m->b[id].Key, 
746                             chosen_name->Key, 
747                             m->b[filename].Key,
748                             CallBack, 
749                             NULL, NULL,
750                             userdata, 
751                             dont_decode
752                         );
753
754                 /*
755                  * Now if it's an encapsulated message/rfc822 then we have to recurse into it
756                  */
757                 if (!strcasecmp(&m->b[content_type].Key[0], "message/rfc822")) {
758
759                         if (PreMultiPartCallBack != NULL) {
760                                 PreMultiPartCallBack("", 
761                                                      "", 
762                                                      partnum, 
763                                                      "",
764                                                      NULL, 
765                                                      m->b[content_type].Key, 
766                                                      m->b[charset].Key,
767                                                      0, 
768                                                      m->b[encoding].Key, 
769                                                      m->b[id].Key, 
770                                                      userdata);
771                         }
772                         if (CallBack != NULL) {
773                                 if (strlen(partnum) > 0) {
774                                         snprintf(nested_partnum,
775                                                  sizeof nested_partnum,
776                                                  "%s.%d", partnum,
777                                                  ++part_seq);
778                                 }
779                                 else {
780                                         snprintf(nested_partnum,
781                                                  sizeof nested_partnum,
782                                                  "%d", ++part_seq);
783                                 }
784                                 the_mime_parser(nested_partnum,
785                                                 part_start, 
786                                                 part_end,
787                                                 CallBack,
788                                                 PreMultiPartCallBack,
789                                                 PostMultiPartCallBack,
790                                                 userdata,
791                                                 dont_decode
792                                         );
793                         }
794                         if (PostMultiPartCallBack != NULL) {
795                                 PostMultiPartCallBack("", 
796                                                       "", 
797                                                       partnum, 
798                                                       "", 
799                                                       NULL,
800                                                       m->b[content_type].Key, 
801                                                       m->b[charset].Key,
802                                                       0, 
803                                                       m->b[encoding].Key, 
804                                                       m->b[id].Key, 
805                                                       userdata);
806                         }
807
808
809                 }
810
811         }
812
813 }
814
815 /*
816  * Break out the components of a multipart message
817  * (This function expects to be fed HEADERS + CONTENT)
818  * Note: NULL can be supplied as content_end; in this case, the message is
819  * considered to have ended when the parser encounters a 0x00 byte.
820  */
821 void the_mime_parser(char *partnum,
822                      char *content_start, char *content_end,
823                      MimeParserCallBackType CallBack,
824                      MimeParserCallBackType PreMultiPartCallBack,
825                      MimeParserCallBackType PostMultiPartCallBack,
826                      void *userdata,
827                      int dont_decode)
828 {
829         interesting_mime_headers *m;
830
831         /* If the caller didn't supply an endpointer, generate one by measure */
832         if (content_end == NULL) {
833                 content_end = &content_start[strlen(content_start)];
834         }
835
836         m = InitInterestingMimes();
837
838         if (!parse_MimeHeaders(m, &content_start, content_end))
839         {
840
841                 recurseable_mime_parser(partnum,
842                                         content_start, content_end,
843                                         CallBack,
844                                         PreMultiPartCallBack,
845                                         PostMultiPartCallBack,
846                                         userdata,
847                                         dont_decode,
848                                         m);
849         }
850         free(m);
851 }
852
853 /*
854  * Entry point for the MIME parser.
855  * (This function expects to be fed HEADERS + CONTENT)
856  * Note: NULL can be supplied as content_end; in this case, the message is
857  * considered to have ended when the parser encounters a 0x00 byte.
858  */
859 void mime_parser(char *content_start,
860                  char *content_end,
861                  MimeParserCallBackType CallBack,
862                  MimeParserCallBackType PreMultiPartCallBack,
863                  MimeParserCallBackType PostMultiPartCallBack,
864                  void *userdata,
865                  int dont_decode)
866 {
867
868         the_mime_parser("", content_start, content_end,
869                         CallBack,
870                         PreMultiPartCallBack,
871                         PostMultiPartCallBack,
872                         userdata, dont_decode);
873 }
874
875
876
877
878
879
880 typedef struct _MimeGuess {
881         const char *Pattern;
882         size_t PatternLen;
883         long PatternOffset;
884         const char *MimeString;
885 } MimeGuess;
886
887 MimeGuess MyMimes [] = {
888         {
889                 "GIF",
890                 3,
891                 0,
892                 "image/gif"
893         },
894         {
895                 "\xff\xd8",
896                 2,
897                 0,
898                 "image/jpeg"
899         },
900         {
901                 "\x89PNG",
902                 4,
903                 0,
904                 "image/png"
905         },
906         { // last...
907                 "",
908                 0,
909                 0,
910                 ""
911         }
912 };
913
914
915 const char *GuessMimeType(const char *data, size_t dlen)
916 {
917         int MimeIndex = 0;
918
919         while (MyMimes[MimeIndex].PatternLen != 0)
920         {
921                 if ((MyMimes[MimeIndex].PatternLen + 
922                      MyMimes[MimeIndex].PatternOffset < dlen) &&
923                     strncmp(MyMimes[MimeIndex].Pattern, 
924                             &data[MyMimes[MimeIndex].PatternOffset], 
925                             MyMimes[MimeIndex].PatternLen) == 0)
926                 {
927                         return MyMimes[MimeIndex].MimeString;
928                 }
929                 MimeIndex ++;
930         }
931         /* 
932          * ok, our simple minded algorythm didn't find anything, 
933          * let the big chegger try it, he wil default to application/octet-stream
934          */
935         return (xdg_mime_get_mime_type_for_data(data, dlen));
936 }
937
938
939 const char* GuessMimeByFilename(const char *what, size_t len)
940 {
941         /* we know some hardcoded on our own, try them... */
942         if ((len > 3) && !strncasecmp(&what[len - 4], ".gif", 4))
943                 return "image/gif";
944         else if ((len > 2) && !strncasecmp(&what[len - 3], ".js", 3))
945                 return  "text/javascript";
946         else if ((len > 3) && !strncasecmp(&what[len - 4], ".txt", 4))
947                 return "text/plain";
948         else if ((len > 3) && !strncasecmp(&what[len - 4], ".css", 4))
949                 return "text/css";
950         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htc", 4))
951                 return "text/x-component";
952         else if ((len > 3) && !strncasecmp(&what[len - 4], ".jpg", 4))
953                 return "image/jpeg";
954         else if ((len > 3) && !strncasecmp(&what[len - 4], ".png", 4))
955                 return "image/png";
956         else if ((len > 3) && !strncasecmp(&what[len - 4], ".ico", 4))
957                 return "image/x-icon";
958         else if ((len > 3) && !strncasecmp(&what[len - 4], ".vcf", 4))
959                 return "text/x-vcard";
960         else if ((len > 4) && !strncasecmp(&what[len - 5], ".html", 5))
961                 return "text/html";
962         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htm", 4))
963                 return "text/html";
964         else if ((len > 3) && !strncasecmp(&what[len - 4], ".wml", 4))
965                 return "text/vnd.wap.wml";
966         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmls", 5))
967                 return "text/vnd.wap.wmlscript";
968         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmlc", 5))
969                 return "application/vnd.wap.wmlc";
970         else if ((len > 5) && !strncasecmp(&what[len - 6], ".wmlsc", 6))
971                 return "application/vnd.wap.wmlscriptc";
972         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wbmp", 5))
973                 return "image/vnd.wap.wbmp";
974         else
975                 /* and let xdgmime do the fallback. */
976                 return xdg_mime_get_mime_type_from_file_name(what);
977 }
978
979 static HashList *IconHash = NULL;
980
981 typedef struct IconName IconName;
982
983 struct IconName {
984         char *FlatName;
985         char *FileName;
986 };
987
988 static void DeleteIcon(void *IconNamePtr)
989 {
990         IconName *Icon = (IconName*) IconNamePtr;
991         free(Icon->FlatName);
992         free(Icon->FileName);
993         free(Icon);
994 }
995
996 /*
997 static const char *PrintFlat(void *IconNamePtr)
998 {
999         IconName *Icon = (IconName*) IconNamePtr;
1000         return Icon->FlatName;
1001 }
1002 static const char *PrintFile(void *IconNamePtr)
1003 {
1004         IconName *Icon = (IconName*) IconNamePtr;
1005         return Icon->FileName;
1006 }
1007 */
1008
1009 #define GENSTR "x-generic"
1010 #define IGNORE_PREFIX_1 "gnome-mime"
1011 int LoadIconDir(const char *DirName)
1012 {
1013         DIR *filedir = NULL;
1014         struct dirent *filedir_entry;
1015         int d_namelen;
1016         int d_without_ext;
1017         IconName *Icon;
1018
1019         filedir = opendir (DirName);
1020         IconHash = NewHash(1, NULL);
1021         if (filedir == NULL) {
1022                 return 0;
1023         }
1024
1025         while ((filedir_entry = readdir(filedir)))
1026         {
1027                 char *MinorPtr;
1028                 char *PStart;
1029 #ifdef _DIRENT_HAVE_D_NAMELEN
1030                 d_namelen = filedir_entry->d_namelen;
1031 #else
1032                 d_namelen = strlen(filedir_entry->d_name);
1033 #endif
1034                 d_without_ext = d_namelen;
1035                 while ((d_without_ext > 0) && (filedir_entry->d_name[d_without_ext] != '.'))
1036                         d_without_ext --;
1037                 if ((d_without_ext == 0) || (d_namelen < 3))
1038                         continue;
1039
1040                 if ((sizeof(IGNORE_PREFIX_1) < d_namelen) &&
1041                     (strncmp(IGNORE_PREFIX_1, 
1042                              filedir_entry->d_name, 
1043                              sizeof(IGNORE_PREFIX_1) - 1) == 0)) {
1044                         PStart = filedir_entry->d_name + sizeof(IGNORE_PREFIX_1);
1045                         d_without_ext -= sizeof(IGNORE_PREFIX_1);
1046                 }
1047                 else {
1048                         PStart = filedir_entry->d_name;
1049                 }
1050                 Icon = malloc(sizeof(IconName));
1051
1052                 Icon->FileName = malloc(d_namelen + 1);
1053                 memcpy(Icon->FileName, filedir_entry->d_name, d_namelen + 1);
1054
1055                 Icon->FlatName = malloc(d_without_ext + 1);
1056                 memcpy(Icon->FlatName, PStart, d_without_ext);
1057                 Icon->FlatName[d_without_ext] = '\0';
1058                 /* Try to find Minor type in image-jpeg */
1059                 MinorPtr = strchr(Icon->FlatName, '-');
1060                 if (MinorPtr != NULL) {
1061                         size_t MinorLen;
1062                         MinorLen = 1 + d_without_ext - (MinorPtr - Icon->FlatName + 1);
1063                         if ((MinorLen == sizeof(GENSTR)) && 
1064                             (strncmp(MinorPtr + 1, GENSTR, sizeof(GENSTR)) == 0)) {
1065                                 /* ok, we found a generic filename. cut the generic. */
1066                                 *MinorPtr = '\0';
1067                                 d_without_ext = d_without_ext - (MinorPtr - Icon->FlatName);
1068                         }
1069                         else { /* Map the major / minor separator to / */
1070                                 *MinorPtr = '/';
1071                         }
1072                 }
1073
1074 //              PrintHash(IconHash, PrintFlat, PrintFile);
1075 //              printf("%s - %s\n", Icon->FlatName, Icon->FileName);
1076                 Put(IconHash, Icon->FlatName, d_without_ext, Icon, DeleteIcon);
1077 //              PrintHash(IconHash, PrintFlat, PrintFile);
1078         }
1079         closedir(filedir);
1080         return 1;
1081 }
1082
1083 const char *GetIconFilename(char *MimeType, size_t len)
1084 {
1085         void *vIcon;
1086         IconName *Icon;
1087         
1088         if(IconHash == NULL)
1089                 return NULL;
1090
1091         GetHash(IconHash, MimeType, len, &vIcon), Icon = (IconName*) vIcon;
1092         /* didn't find the exact mimetype? try major only. */
1093         if (Icon == NULL) {
1094                 char * pMinor;
1095                 pMinor = strchr(MimeType, '/');
1096                 if (pMinor != NULL) {
1097                         *pMinor = '\0';
1098                         GetHash(IconHash, MimeType, pMinor - MimeType, &vIcon),
1099                                 Icon = (IconName*) vIcon;
1100                 }
1101         }
1102         if (Icon == NULL) {
1103                 return NULL;
1104         }
1105
1106         /*printf("Getting: [%s] == [%s] -> [%s]\n", MimeType, Icon->FlatName, Icon->FileName);*/
1107         return Icon->FileName;
1108 }
1109
1110 void ShutDownLibCitadelMime(void)
1111 {
1112         DeleteHash(&IconHash);
1113 }