performance improvements: replace memcmp
[citadel.git] / libcitadel / lib / mime_parser.c
1 /*
2  * This is the MIME parser for Citadel.
3  *
4  * Copyright (c) 1998-2010 by the citadel.org development team.
5  * This code is distributed under the GNU General Public License v3.
6  *
7  */
8
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <stdio.h>
12 #include <signal.h>
13 #include <sys/types.h>
14 #include <ctype.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/types.h>
18 #include <dirent.h>
19 #include <errno.h>
20
21 #include "xdgmime/xdgmime.h"
22 #include "libcitadel.h"
23 #include "libcitadellocal.h"
24
25 const unsigned char FromHexTable [256] = {
26         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //  0
27         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 10
28         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 20
29         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 30
30         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, // 40
31         0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, // 50
32         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, // 60
33         0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 70
34         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 80
35         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, // 90
36         0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //100
37         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //110
38         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //120
39         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //130
40         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //140
41         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //150
42         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //160
43         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //170
44         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //180
45         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //190
46         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //200
47         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //210
48         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //220
49         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //230
50         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //240
51         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF                          //250
52 };
53
54
55 void extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd)
56 {
57         char *sptr, *ptr = NULL;
58         int double_quotes = 0;
59         long RealKeyLen = keylen;
60
61         sptr = source;
62
63         while (sptr != NULL)
64         {
65                 ptr = bmstrcasestr_len(sptr, sourcelen - (sptr - source), 
66                                        key, keylen);
67                 if(ptr != NULL)
68                 {
69                         while (isspace(*(ptr + RealKeyLen)))
70                                 RealKeyLen ++;
71                         if (*(ptr + RealKeyLen) == KeyEnd)
72                         {
73                                 sptr = NULL;
74                                 RealKeyLen ++;                          
75                         }
76                         else
77                         {
78                                 sptr = ptr + RealKeyLen + 1;
79                         }
80                 }
81                 else 
82                         sptr = ptr;
83         }
84         if (ptr == NULL) {
85                 *target = '\0';
86                 return;
87         }
88         strcpy(target, (ptr + RealKeyLen));
89
90         for (ptr=target; (*ptr != 0); ptr++) {
91
92                 /* A semicolon means we've hit the end of the key, unless we're inside double quotes */
93                 if ( (double_quotes != 1) && (*ptr == ';')) {
94                         *ptr = 0;
95                 }
96
97                 /* if we find double quotes, we've got a great set of string boundaries */
98                 if (*ptr == '\"') {
99                         ++double_quotes;
100                         if (double_quotes == 1) {
101                                 strcpy(ptr, ptr+1);
102                         }
103                         else {
104                                 *ptr = 0;
105                         }
106                 }
107         }
108         *ptr = '\0';
109 }
110
111
112 /*
113  * For non-multipart messages, we need to generate a quickie partnum of "1"
114  * to return to callback functions.  Some callbacks demand it.
115  */
116 char *fixed_partnum(char *supplied_partnum) {
117         if (supplied_partnum == NULL) return "1";
118         if (strlen(supplied_partnum)==0) return "1";
119         return supplied_partnum;
120 }
121
122
123 static inline unsigned int _decode_hex(const char *Source)
124 {
125         int ret = '?';
126         unsigned char LO_NIBBLE;
127         unsigned char HI_NIBBLE;
128
129         HI_NIBBLE = FromHexTable[(unsigned char) *Source];
130         LO_NIBBLE = FromHexTable[(unsigned char) *(Source+1)];
131         
132         if ((LO_NIBBLE == 0xFF) || (LO_NIBBLE == 0xFF))
133                 return ret;
134         ret = HI_NIBBLE;
135         ret = ret << 4;
136         ret = ret | LO_NIBBLE;
137         return ret;
138 }
139
140 unsigned int decode_hex(char *Source) {return _decode_hex(Source);}
141
142 /*
143  * Convert "quoted-printable" to binary.  Returns number of bytes decoded.
144  * according to RFC2045 section 6.7
145  */
146 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
147         unsigned int ch;
148         int decoded_length = 0;
149         int pos = 0;
150
151         while (pos < sourcelen)
152         {
153                 if (*(encoded + pos) == '=')
154                 {
155                         pos ++;
156                         if (*(encoded + pos) == '\n')
157                         {
158                                 pos ++;
159                         }
160                         else if (*(encoded + pos) == '\r')
161                         {
162                                 pos ++;
163                                 if (*(encoded + pos) == '\n')
164                                         pos++;
165                         }
166                         else
167                         {
168                                 ch = 0;
169                                 ch = _decode_hex(&encoded[pos]);
170                                 pos += 2;
171                                 decoded[decoded_length++] = ch;
172                         }
173                 }
174                 else
175                 {
176                         decoded[decoded_length++] = encoded[pos];
177                         pos += 1;
178                 }
179         }
180         decoded[decoded_length] = 0;
181         return(decoded_length);
182 }
183
184
185 /*
186  * Given a message or message-part body and a length, handle any necessary
187  * decoding and pass the request up the stack.
188  */
189 void mime_decode(char *partnum,
190                  char *part_start, size_t length,
191                  char *content_type, char *charset, char *encoding,
192                  char *disposition,
193                  char *id,
194                  char *name, char *filename,
195                  MimeParserCallBackType CallBack,
196                  MimeParserCallBackType PreMultiPartCallBack,
197                  MimeParserCallBackType PostMultiPartCallBack,
198                  void *userdata,
199                  int dont_decode)
200 {
201
202         char *decoded;
203         size_t bytes_decoded = 0;
204
205         /* Some encodings aren't really encodings */
206         if (!strcasecmp(encoding, "7bit"))
207                 strcpy(encoding, "");
208         if (!strcasecmp(encoding, "8bit"))
209                 strcpy(encoding, "");
210         if (!strcasecmp(encoding, "binary"))
211                 strcpy(encoding, "");
212
213         /* If this part is not encoded, send as-is */
214         if ( (strlen(encoding) == 0) || (dont_decode)) {
215                 if (CallBack != NULL) {
216                         CallBack(name, 
217                                  filename, 
218                                  fixed_partnum(partnum),
219                                  disposition, 
220                                  part_start,
221                                  content_type, 
222                                  charset, 
223                                  length, 
224                                  encoding, 
225                                  id,
226                                  userdata);
227                         }
228                 return;
229         }
230         
231         /* Fail silently if we hit an unknown encoding. */
232         if ((strcasecmp(encoding, "base64"))
233             && (strcasecmp(encoding, "quoted-printable"))) {
234                 return;
235         }
236
237         /*
238          * Allocate a buffer for the decoded data.  The output buffer is slightly
239          * larger than the input buffer; this assumes that the decoded data
240          * will never be significantly larger than the encoded data.  This is a
241          * safe assumption with base64, uuencode, and quoted-printable.
242          */
243         decoded = malloc(length + 32768);
244         if (decoded == NULL) {
245                 return;
246         }
247
248         if (!strcasecmp(encoding, "base64")) {
249                 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
250         }
251         else if (!strcasecmp(encoding, "quoted-printable")) {
252                 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
253         }
254
255         if (bytes_decoded > 0) if (CallBack != NULL) {
256                 CallBack(name, filename, fixed_partnum(partnum),
257                         disposition, decoded,
258                         content_type, charset, bytes_decoded, "binary", id, userdata);
259         }
260
261         free(decoded);
262 }
263
264 /*
265  * this is the extract of mime_decode which can be called if 'dont_decode' was set; 
266  * to save the cpu intense process of decoding to the time when it realy wants the content. 
267  * returns: 
268  *   - > 0 we decoded something, its on *decoded, you need to free it.
269  *   - = 0 no need to decode stuff. *decoded will be NULL.
270  *   - < 0 an error occured, either an unknown encoding, or alloc failed. no need to free.
271  */
272 int mime_decode_now (char *part_start, 
273                      size_t length,
274                      char *encoding,
275                      char **decoded,
276                      size_t *bytes_decoded)
277 {
278         *bytes_decoded = 0;
279         *decoded = NULL;
280         /* Some encodings aren't really encodings */
281         if (!strcasecmp(encoding, "7bit"))
282                 strcpy(encoding, "");
283         if (!strcasecmp(encoding, "8bit"))
284                 strcpy(encoding, "");
285         if (!strcasecmp(encoding, "binary"))
286                 strcpy(encoding, "");
287
288         /* If this part is not encoded, send as-is */
289         if (strlen(encoding) == 0) {
290                 return 0;
291         }
292         
293
294         /* Fail if we hit an unknown encoding. */
295         if ((strcasecmp(encoding, "base64"))
296             && (strcasecmp(encoding, "quoted-printable"))) {
297                 return -1;
298         }
299
300         /*
301          * Allocate a buffer for the decoded data.  The output buffer is slightly
302          * larger than the input buffer; this assumes that the decoded data
303          * will never be significantly larger than the encoded data.  This is a
304          * safe assumption with base64, uuencode, and quoted-printable.
305          */
306         *decoded = malloc(length + 32768);
307         if (decoded == NULL) {
308                 return -1;
309         }
310
311         if (!strcasecmp(encoding, "base64")) {
312                 *bytes_decoded = CtdlDecodeBase64(*decoded, part_start, length);
313                 return 1;
314         }
315         else if (!strcasecmp(encoding, "quoted-printable")) {
316                 *bytes_decoded = CtdlDecodeQuotedPrintable(*decoded, part_start, length);
317                 return 1;
318         }
319         return -1;
320 }
321
322
323 /*
324  * Break out the components of a multipart message
325  * (This function expects to be fed HEADERS + CONTENT)
326  * Note: NULL can be supplied as content_end; in this case, the message is
327  * considered to have ended when the parser encounters a 0x00 byte.
328  */
329 void the_mime_parser(char *partnum,
330                      char *content_start, char *content_end,
331                      MimeParserCallBackType CallBack,
332                      MimeParserCallBackType PreMultiPartCallBack,
333                      MimeParserCallBackType PostMultiPartCallBack,
334                      void *userdata,
335                      int dont_decode)
336 {
337
338         char *ptr;
339         char *part_start, *part_end = NULL;
340         char buf[SIZ];
341         char *header;
342         char *boundary;
343         char *startary;
344         size_t startary_len = 0;
345         char *endary;
346         char *next_boundary;
347         char *content_type;
348         char *charset;
349         size_t content_type_len;
350         size_t content_length;
351         char *encoding;
352         char *disposition;
353         size_t disposition_len;
354         char *id;
355         char *name = NULL;
356         char *content_type_name;
357         char *content_disposition_name;
358         char *filename;
359         int is_multipart;
360         int part_seq = 0;
361         int i;
362         size_t length;
363         char nested_partnum[256];
364         int crlf_in_use = 0;
365         char *evaluate_crlf_ptr = NULL;
366         int buflen = 0;
367         int headerlen = 0;
368
369         ptr = content_start;
370         content_length = 0;
371
372         boundary = malloc(SIZ * 12);
373         *boundary = '\0';
374
375         startary = boundary + SIZ * 1;
376         *startary = '\0';
377
378         endary = boundary + SIZ * 2;
379         *endary = '\0';
380
381         header = boundary + SIZ * 3;
382         *header = '\0';
383
384         content_type = boundary + SIZ * 4;
385         *content_type = '\0';
386
387         charset = boundary + SIZ * 5;
388         *charset = '\0';
389
390         encoding = boundary + SIZ * 6;
391         *encoding = '\0';
392
393         content_type_name = boundary + SIZ * 7;
394         *content_type_name = '\0';
395
396         content_disposition_name = boundary + SIZ * 8;
397         *content_disposition_name = '\0';
398
399         filename = boundary + SIZ * 9;
400         *filename = '\0';
401
402         disposition = boundary + SIZ * 10;
403         *disposition = '\0';
404
405         id = boundary + SIZ * 11;
406         *id = '\0';
407
408         /* If the caller didn't supply an endpointer, generate one by measure */
409         if (content_end == NULL) {
410                 content_end = &content_start[strlen(content_start)];
411         }
412
413         /* Learn interesting things from the headers */
414         strcpy(header, "");
415         headerlen = 0;
416         do {
417                 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
418                 if (ptr >= content_end) {
419                         goto end_parser;
420                 }
421
422                 for (i = 0; i < buflen; ++i) {
423                         if (isspace(buf[i])) {
424                                 buf[i] = ' ';
425                         }
426                 }
427
428                 if (!isspace(buf[0])) {
429                         if (!strncasecmp(header, "Content-type:", 13)) {
430                                 memcpy (content_type, &header[13], headerlen - 12);
431                                 content_type_len = striplt (content_type);
432
433                                 extract_key(content_type_name, content_type, content_type_len, HKEY("name"), '=');
434                                 extract_key(charset,           content_type, content_type_len, HKEY("charset"), '=');
435                                 extract_key(boundary,          header,       headerlen,        HKEY("boundary"), '=');
436
437                                 /* Deal with weird headers */
438                                 if (strchr(content_type, ' '))
439                                         *(strchr(content_type, ' ')) = '\0';
440                                 if (strchr(content_type, ';'))
441                                         *(strchr(content_type, ';')) = '\0';
442                         }
443                         else if (!strncasecmp(header, "Content-Disposition:", 20)) {
444                                 memcpy (disposition, &header[20], headerlen - 19);
445                                 disposition_len = striplt(disposition);
446                                 extract_key(content_disposition_name, disposition, disposition_len,  HKEY("name"), '=');
447                                 extract_key(filename,                 disposition, disposition_len, HKEY("filename"), '=');
448                         }
449                         else if (!strncasecmp(header, "Content-ID:", 11)) {
450                                 strcpy(id, &header[11]);
451                                 striplt(id);
452                                 stripallbut(id, '<', '>');
453                         }
454                         else if (!strncasecmp(header, "Content-length: ", 15)) {
455                                 char clbuf[10];
456                                 safestrncpy(clbuf, &header[15], sizeof clbuf);
457                                 striplt(clbuf);
458                                 content_length = (size_t) atol(clbuf);
459                         }
460                         else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
461                                 strcpy(encoding, &header[26]);
462                                 striplt(encoding);
463                         }
464                         strcpy(header, "");
465                         headerlen = 0;
466                 }
467                 if ((headerlen + buflen + 2) < SIZ) {
468                         memcpy(&header[headerlen], buf, buflen);
469                         headerlen += buflen;
470                         header[headerlen] = '\0';
471                 }
472         } while ((!IsEmptyStr(buf)) && (*ptr != 0));
473
474         if (strchr(disposition, ';'))
475                 *(strchr(disposition, ';')) = '\0';
476         striplt(disposition);
477         if (strchr(content_type, ';'))
478                 *(strchr(content_type, ';')) = '\0';
479         striplt(content_type);
480
481         if (!IsEmptyStr(boundary)) {
482                 is_multipart = 1;
483         } else {
484                 is_multipart = 0;
485         }
486
487         /* If this is a multipart message, then recursively process it */
488         part_start = NULL;
489         if (is_multipart) {
490
491                 /* Tell the client about this message's multipartedness */
492                 if (PreMultiPartCallBack != NULL) {
493                         PreMultiPartCallBack("", "", partnum, "",
494                                 NULL, content_type, charset,
495                                 0, encoding, id, userdata);
496                 }
497
498                 /* Figure out where the boundaries are */
499                 startary_len = snprintf(startary, SIZ, "--%s", boundary);
500                 snprintf(endary, SIZ, "--%s--", boundary);
501
502                 part_start = NULL;
503                 do {
504                         char tmp;
505
506                         tmp = *content_end;
507                         *content_end = '\0';
508                         
509                         next_boundary = strstr(ptr, startary);
510                         *content_end = tmp;
511
512                         if ( (part_start != NULL) && (next_boundary != NULL) ) {
513                                 part_end = next_boundary;
514                                 --part_end;             /* omit the trailing LF */
515                                 if (crlf_in_use) {
516                                         --part_end;     /* omit the trailing CR */
517                                 }
518
519                                 if (!IsEmptyStr(partnum)) {
520                                         snprintf(nested_partnum,
521                                                  sizeof nested_partnum,
522                                                  "%s.%d", partnum,
523                                                  ++part_seq);
524                                 }
525                                 else {
526                                         snprintf(nested_partnum,
527                                                  sizeof nested_partnum,
528                                                  "%d", ++part_seq);
529                                 }
530                                 the_mime_parser(nested_partnum,
531                                             part_start, part_end,
532                                                 CallBack,
533                                                 PreMultiPartCallBack,
534                                                 PostMultiPartCallBack,
535                                                 userdata,
536                                                 dont_decode);
537                         }
538
539                         if (next_boundary != NULL) {
540                                 /* If we pass out of scope, don't attempt to
541                                  * read past the end boundary. */
542                                 if (!strcmp(next_boundary, endary)) {
543                                         ptr = content_end;
544                                 }
545                                 else {
546                                         /* Set up for the next part. */
547                                         part_start = strstr(next_boundary, "\n");
548                                         
549                                         /* Determine whether newlines are LF or CRLF */
550                                         evaluate_crlf_ptr = part_start;
551                                         --evaluate_crlf_ptr;
552                                         if ((*evaluate_crlf_ptr == '\r') && 
553                                             (*(evaluate_crlf_ptr + 1) == '\n'))
554                                         {
555                                                 crlf_in_use = 1;
556                                         }
557                                         else {
558                                                 crlf_in_use = 0;
559                                         }
560
561                                         /* Advance past the LF ... now we're in the next part */
562                                         ++part_start;
563                                         ptr = part_start;
564                                 }
565                         }
566                         else {
567                                 /* Invalid end of multipart.  Bail out! */
568                                 ptr = content_end;
569                         }
570                 } while ( (ptr < content_end) && (next_boundary != NULL) );
571
572                 if (PostMultiPartCallBack != NULL) {
573                         PostMultiPartCallBack("", "", partnum, "", NULL,
574                                 content_type, charset, 0, encoding, id, userdata);
575                 }
576                 goto end_parser;
577         }
578
579         /* If it's not a multipart message, then do something with it */
580         if (!is_multipart) {
581                 part_start = ptr;
582                 length = 0;
583                 while (ptr < content_end) {
584                         ++ptr;
585                         ++length;
586                 }
587                 part_end = content_end;
588
589
590                 /* The following code will truncate the MIME part to the size
591                  * specified by the Content-length: header.   We have commented it
592                  * out because these headers have a tendency to be wrong.
593                  *
594                  *      if ( (content_length > 0) && (length > content_length) ) {
595                  *              length = content_length;
596                  *      }
597                  */
598
599                 /* Sometimes the "name" field is tacked on to Content-type,
600                  * and sometimes it's tacked on to Content-disposition.  Use
601                  * whichever one we have.
602                  */
603                 if (strlen(content_disposition_name) > strlen(content_type_name)) {
604                         name = content_disposition_name;
605                 }
606                 else {
607                         name = content_type_name;
608                 }
609         
610                 /* Ok, we've got a non-multipart part here, so do something with it.
611                  */
612                 mime_decode(partnum,
613                         part_start, length,
614                         content_type, charset, encoding, disposition, id,
615                         name, filename,
616                         CallBack, NULL, NULL,
617                         userdata, dont_decode
618                 );
619
620                 /*
621                  * Now if it's an encapsulated message/rfc822 then we have to recurse into it
622                  */
623                 if (!strcasecmp(content_type, "message/rfc822")) {
624
625                         if (PreMultiPartCallBack != NULL) {
626                                 PreMultiPartCallBack("", "", partnum, "",
627                                         NULL, content_type, charset,
628                                         0, encoding, id, userdata);
629                         }
630                         if (CallBack != NULL) {
631                                 if (strlen(partnum) > 0) {
632                                         snprintf(nested_partnum,
633                                                  sizeof nested_partnum,
634                                                  "%s.%d", partnum,
635                                                  ++part_seq);
636                                 }
637                                 else {
638                                         snprintf(nested_partnum,
639                                                  sizeof nested_partnum,
640                                                  "%d", ++part_seq);
641                                 }
642                                 the_mime_parser(nested_partnum,
643                                         part_start, part_end,
644                                         CallBack,
645                                         PreMultiPartCallBack,
646                                         PostMultiPartCallBack,
647                                         userdata,
648                                         dont_decode
649                                 );
650                         }
651                         if (PostMultiPartCallBack != NULL) {
652                                 PostMultiPartCallBack("", "", partnum, "", NULL,
653                                         content_type, charset, 0, encoding, id, userdata);
654                         }
655
656
657                 }
658
659         }
660
661 end_parser:     /* free the buffers!  end the oppression!! */
662         free(boundary);
663 }
664
665
666
667 /*
668  * Entry point for the MIME parser.
669  * (This function expects to be fed HEADERS + CONTENT)
670  * Note: NULL can be supplied as content_end; in this case, the message is
671  * considered to have ended when the parser encounters a 0x00 byte.
672  */
673 void mime_parser(char *content_start,
674                  char *content_end,
675                  MimeParserCallBackType CallBack,
676                  MimeParserCallBackType PreMultiPartCallBack,
677                  MimeParserCallBackType PostMultiPartCallBack,
678                  void *userdata,
679                  int dont_decode)
680 {
681
682         the_mime_parser("", content_start, content_end,
683                         CallBack,
684                         PreMultiPartCallBack,
685                         PostMultiPartCallBack,
686                         userdata, dont_decode);
687 }
688
689
690
691
692
693
694 typedef struct _MimeGuess {
695         const char *Pattern;
696         size_t PatternLen;
697         long PatternOffset;
698         const char *MimeString;
699 } MimeGuess;
700
701 MimeGuess MyMimes [] = {
702         {
703                 "GIF",
704                 3,
705                 0,
706                 "image/gif"
707         },
708         {
709                 "\xff\xd8",
710                 2,
711                 0,
712                 "image/jpeg"
713         },
714         {
715                 "\x89PNG",
716                 4,
717                 0,
718                 "image/png"
719         },
720         { // last...
721                 "",
722                 0,
723                 0,
724                 ""
725         }
726 };
727
728
729 const char *GuessMimeType(const char *data, size_t dlen)
730 {
731         int MimeIndex = 0;
732
733         while (MyMimes[MimeIndex].PatternLen != 0)
734         {
735                 if ((MyMimes[MimeIndex].PatternLen + 
736                      MyMimes[MimeIndex].PatternOffset < dlen) &&
737                     strncmp(MyMimes[MimeIndex].Pattern, 
738                             &data[MyMimes[MimeIndex].PatternOffset], 
739                             MyMimes[MimeIndex].PatternLen) == 0)
740                 {
741                         return MyMimes[MimeIndex].MimeString;
742                 }
743                 MimeIndex ++;
744         }
745         /* 
746          * ok, our simple minded algorythm didn't find anything, 
747          * let the big chegger try it, he wil default to application/octet-stream
748          */
749         return (xdg_mime_get_mime_type_for_data(data, dlen));
750 }
751
752
753 const char* GuessMimeByFilename(const char *what, size_t len)
754 {
755         /* we know some hardcoded on our own, try them... */
756         if ((len > 3) && !strncasecmp(&what[len - 4], ".gif", 4))
757                 return "image/gif";
758         else if ((len > 2) && !strncasecmp(&what[len - 3], ".js", 3))
759                 return  "text/javascript";
760         else if ((len > 3) && !strncasecmp(&what[len - 4], ".txt", 4))
761                 return "text/plain";
762         else if ((len > 3) && !strncasecmp(&what[len - 4], ".css", 4))
763                 return "text/css";
764         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htc", 4))
765                 return "text/x-component";
766         else if ((len > 3) && !strncasecmp(&what[len - 4], ".jpg", 4))
767                 return "image/jpeg";
768         else if ((len > 3) && !strncasecmp(&what[len - 4], ".png", 4))
769                 return "image/png";
770         else if ((len > 3) && !strncasecmp(&what[len - 4], ".ico", 4))
771                 return "image/x-icon";
772         else if ((len > 3) && !strncasecmp(&what[len - 4], ".vcf", 4))
773                 return "text/x-vcard";
774         else if ((len > 4) && !strncasecmp(&what[len - 5], ".html", 5))
775                 return "text/html";
776         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htm", 4))
777                 return "text/html";
778         else if ((len > 3) && !strncasecmp(&what[len - 4], ".wml", 4))
779                 return "text/vnd.wap.wml";
780         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmls", 5))
781                 return "text/vnd.wap.wmlscript";
782         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmlc", 5))
783                 return "application/vnd.wap.wmlc";
784         else if ((len > 5) && !strncasecmp(&what[len - 6], ".wmlsc", 6))
785                 return "application/vnd.wap.wmlscriptc";
786         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wbmp", 5))
787                 return "image/vnd.wap.wbmp";
788         else
789                 /* and let xdgmime do the fallback. */
790                 return xdg_mime_get_mime_type_from_file_name(what);
791 }
792
793 static HashList *IconHash = NULL;
794
795 typedef struct IconName IconName;
796
797 struct IconName {
798         char *FlatName;
799         char *FileName;
800 };
801
802 static void DeleteIcon(void *IconNamePtr)
803 {
804         IconName *Icon = (IconName*) IconNamePtr;
805         free(Icon->FlatName);
806         free(Icon->FileName);
807         free(Icon);
808 }
809
810 /*
811 static const char *PrintFlat(void *IconNamePtr)
812 {
813         IconName *Icon = (IconName*) IconNamePtr;
814         return Icon->FlatName;
815 }
816 static const char *PrintFile(void *IconNamePtr)
817 {
818         IconName *Icon = (IconName*) IconNamePtr;
819         return Icon->FileName;
820 }
821 */
822
823 #define GENSTR "x-generic"
824 #define IGNORE_PREFIX_1 "gnome-mime"
825 int LoadIconDir(const char *DirName)
826 {
827         DIR *filedir = NULL;
828         struct dirent *filedir_entry;
829         int d_namelen;
830         int d_without_ext;
831         IconName *Icon;
832
833         filedir = opendir (DirName);
834         IconHash = NewHash(1, NULL);
835         if (filedir == NULL) {
836                 return 0;
837         }
838
839         while ((filedir_entry = readdir(filedir)))
840         {
841                 char *MinorPtr;
842                 char *PStart;
843 #ifdef _DIRENT_HAVE_D_NAMELEN
844                 d_namelen = filedir_entry->d_namelen;
845 #else
846                 d_namelen = strlen(filedir_entry->d_name);
847 #endif
848                 d_without_ext = d_namelen;
849                 while ((d_without_ext > 0) && (filedir_entry->d_name[d_without_ext] != '.'))
850                         d_without_ext --;
851                 if ((d_without_ext == 0) || (d_namelen < 3))
852                         continue;
853
854                 if ((sizeof(IGNORE_PREFIX_1) < d_namelen) &&
855                     (strncmp(IGNORE_PREFIX_1, 
856                              filedir_entry->d_name, 
857                              sizeof(IGNORE_PREFIX_1) - 1) == 0)) {
858                         PStart = filedir_entry->d_name + sizeof(IGNORE_PREFIX_1);
859                         d_without_ext -= sizeof(IGNORE_PREFIX_1);
860                 }
861                 else {
862                         PStart = filedir_entry->d_name;
863                 }
864                 Icon = malloc(sizeof(IconName));
865
866                 Icon->FileName = malloc(d_namelen + 1);
867                 memcpy(Icon->FileName, filedir_entry->d_name, d_namelen + 1);
868
869                 Icon->FlatName = malloc(d_without_ext + 1);
870                 memcpy(Icon->FlatName, PStart, d_without_ext);
871                 Icon->FlatName[d_without_ext] = '\0';
872                 /* Try to find Minor type in image-jpeg */
873                 MinorPtr = strchr(Icon->FlatName, '-');
874                 if (MinorPtr != NULL) {
875                         size_t MinorLen;
876                         MinorLen = 1 + d_without_ext - (MinorPtr - Icon->FlatName + 1);
877                         if ((MinorLen == sizeof(GENSTR)) && 
878                             (strncmp(MinorPtr + 1, GENSTR, sizeof(GENSTR)) == 0)) {
879                                 /* ok, we found a generic filename. cut the generic. */
880                                 *MinorPtr = '\0';
881                                 d_without_ext = d_without_ext - (MinorPtr - Icon->FlatName);
882                         }
883                         else { /* Map the major / minor separator to / */
884                                 *MinorPtr = '/';
885                         }
886                 }
887
888 //              PrintHash(IconHash, PrintFlat, PrintFile);
889 //              printf("%s - %s\n", Icon->FlatName, Icon->FileName);
890                 Put(IconHash, Icon->FlatName, d_without_ext, Icon, DeleteIcon);
891 //              PrintHash(IconHash, PrintFlat, PrintFile);
892         }
893         closedir(filedir);
894         return 1;
895 }
896
897 const char *GetIconFilename(char *MimeType, size_t len)
898 {
899         void *vIcon;
900         IconName *Icon;
901         
902         if(IconHash == NULL)
903                 return NULL;
904
905         GetHash(IconHash, MimeType, len, &vIcon), Icon = (IconName*) vIcon;
906         /* didn't find the exact mimetype? try major only. */
907         if (Icon == NULL) {
908                 char * pMinor;
909                 pMinor = strchr(MimeType, '/');
910                 if (pMinor != NULL) {
911                         *pMinor = '\0';
912                         GetHash(IconHash, MimeType, pMinor - MimeType, &vIcon),
913                                 Icon = (IconName*) vIcon;
914                 }
915         }
916         if (Icon == NULL) {
917                 return NULL;
918         }
919
920         /*printf("Getting: [%s] == [%s] -> [%s]\n", MimeType, Icon->FlatName, Icon->FileName);*/
921         return Icon->FileName;
922 }
923
924 void ShutDownLibCitadelMime(void)
925 {
926         DeleteHash(&IconHash);
927 }