utf8ify_rfc822_string() is in libcitadel now
[citadel.git] / libcitadel / lib / mime_parser.c
1 // This is the MIME parser for Citadel.
2 //
3 // Copyright (c) 1998-2022 by the citadel.org development team.
4 //
5 // This program is open source software.  Use, duplication, or disclosure
6 // is subject to the terms of the GNU General Public License, version 3.
7
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <signal.h>
12 #include <sys/types.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <sys/stat.h>
16 #include <sys/types.h>
17 #include <dirent.h>
18 #include <errno.h>
19
20 #include "xdgmime/xdgmime.h"
21 #include "libcitadel.h"
22 #include "libcitadellocal.h"
23
24 const unsigned char FromHexTable [256] = {
25         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //  0
26         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 10
27         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 20
28         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 30
29         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, // 40
30         0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, // 50
31         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, // 60
32         0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 70
33         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 80
34         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, // 90
35         0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //100
36         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //110
37         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //120
38         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //130
39         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //140
40         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //150
41         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //160
42         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //170
43         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //180
44         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //190
45         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //200
46         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //210
47         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //220
48         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //230
49         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //240
50         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF                          //250
51 };
52
53
54 long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd) {
55         char *sptr, *ptr = NULL;
56         int double_quotes = 0;
57         long RealKeyLen = keylen;
58
59         sptr = source;
60
61         while (sptr != NULL) {
62                 ptr = bmstrcasestr_len(sptr, sourcelen - (sptr - source), key, keylen);
63                 if (ptr != NULL) {
64                         while (isspace(*(ptr + RealKeyLen)))
65                                 RealKeyLen ++;
66                         if (*(ptr + RealKeyLen) == KeyEnd) {
67                                 sptr = NULL;
68                                 RealKeyLen ++;                          
69                         }
70                         else {
71                                 sptr = ptr + RealKeyLen + 1;
72                         }
73                 }
74                 else 
75                         sptr = ptr;
76         }
77         if (ptr == NULL) {
78                 *target = '\0';
79                 return 0;
80         }
81         strcpy(target, (ptr + RealKeyLen));
82
83         for (ptr=target; (*ptr != 0); ptr++) {
84
85                 /* A semicolon means we've hit the end of the key, unless we're inside double quotes */
86                 if ( (double_quotes != 1) && (*ptr == ';')) {
87                         *ptr = 0;
88                 }
89
90                 /* if we find double quotes, we've got a great set of string boundaries */
91                 if (*ptr == '\"') {
92                         ++double_quotes;
93                         if (double_quotes == 1) {
94                                 strcpy(ptr, ptr+1);
95                         }
96                         else {
97                                 *ptr = 0;
98                         }
99                 }
100         }
101         *ptr = '\0';
102         return ptr - target;
103 }
104
105
106 /*
107  * For non-multipart messages, we need to generate a quickie partnum of "1"
108  * to return to callback functions.  Some callbacks demand it.
109  */
110 char *fixed_partnum(char *supplied_partnum) {
111         if (supplied_partnum == NULL) return "1";
112         if (strlen(supplied_partnum)==0) return "1";
113         return supplied_partnum;
114 }
115
116
117 static inline unsigned int _decode_hex(const char *Source) {
118         unsigned int ret = '?';
119         unsigned char LO_NIBBLE;
120         unsigned char HI_NIBBLE;
121
122         HI_NIBBLE = FromHexTable[(unsigned char) *Source];
123         LO_NIBBLE = FromHexTable[(unsigned char) *(Source+1)];
124         
125         if ((LO_NIBBLE == 0xFF) || (LO_NIBBLE == 0xFF))
126                 return ret;
127         ret = HI_NIBBLE;
128         ret = ret << 4;
129         ret = ret | LO_NIBBLE;
130         return ret;
131 }
132
133 unsigned int decode_hex(char *Source) {return _decode_hex(Source);}
134
135 /*
136  * Convert "quoted-printable" to binary.  Returns number of bytes decoded.
137  * according to RFC2045 section 6.7
138  */
139 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
140         unsigned int ch;
141         int decoded_length = 0;
142         int pos = 0;
143
144         while (pos < sourcelen) {
145                 if (*(encoded + pos) == '=') {
146                         pos ++;
147                         if (*(encoded + pos) == '\n') {
148                                 pos ++;
149                         }
150                         else if (*(encoded + pos) == '\r') {
151                                 pos ++;
152                                 if (*(encoded + pos) == '\n')
153                                         pos++;
154                         }
155                         else {
156                                 ch = _decode_hex(&encoded[pos]);
157                                 pos += 2;
158                                 decoded[decoded_length++] = ch;
159                         }
160                 }
161                 else {
162                         decoded[decoded_length++] = encoded[pos];
163                         pos += 1;
164                 }
165         }
166         decoded[decoded_length] = 0;
167         return(decoded_length);
168 }
169
170
171 /*
172  * Given a message or message-part body and a length, handle any necessary
173  * decoding and pass the request up the stack.
174  */
175 void mime_decode(char *partnum,
176                  char *part_start, size_t length,
177                  char *content_type, char *charset, char *encoding,
178                  char *disposition,
179                  char *id,
180                  char *name, char *filename,
181                  MimeParserCallBackType CallBack,
182                  MimeParserCallBackType PreMultiPartCallBack,
183                  MimeParserCallBackType PostMultiPartCallBack,
184                  void *userdata,
185                  int dont_decode
186 ) {
187         char *decoded;
188         size_t bytes_decoded = 0;
189
190         // Some encodings aren't really encodings
191         if (!strcasecmp(encoding, "7bit"))
192                 *encoding = '\0';
193         if (!strcasecmp(encoding, "8bit"))
194                 *encoding = '\0';
195         if (!strcasecmp(encoding, "binary"))
196                 *encoding = '\0';
197         if (!strcasecmp(encoding, "ISO-8859-1"))
198                 *encoding = '\0';
199
200         // If this part is not encoded, send as-is
201         if ( (strlen(encoding) == 0) || (dont_decode)) {
202                 if (CallBack != NULL) {
203                         CallBack(name, 
204                                  filename, 
205                                  fixed_partnum(partnum),
206                                  disposition, 
207                                  part_start,
208                                  content_type, 
209                                  charset, 
210                                  length, 
211                                  encoding, 
212                                  id,
213                                  userdata);
214                         }
215                 return;
216         }
217         
218         // Fail silently if we hit an unknown encoding.
219         if ((strcasecmp(encoding, "base64")) && (strcasecmp(encoding, "quoted-printable"))) {
220                 return;
221         }
222
223         // Allocate a buffer for the decoded data.  The output buffer is slightly
224         // larger than the input buffer; this assumes that the decoded data
225         // will never be significantly larger than the encoded data.  This is a
226         // safe assumption with base64, uuencode, and quoted-printable.
227         decoded = malloc(length + 32768);
228         if (decoded == NULL) {
229                 return;
230         }
231
232         if (!strcasecmp(encoding, "base64")) {
233                 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
234         }
235         else if (!strcasecmp(encoding, "quoted-printable")) {
236                 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
237         }
238
239         if (bytes_decoded > 0) if (CallBack != NULL) {
240                         char encoding_buf[SIZ];
241
242                         strcpy(encoding_buf, "binary");
243                         CallBack(name, 
244                                  filename, 
245                                  fixed_partnum(partnum),
246                                  disposition, 
247                                  decoded,
248                                  content_type, 
249                                  charset, 
250                                  bytes_decoded, 
251                                  encoding_buf, 
252                                  id, 
253                                  userdata);
254         }
255
256         free(decoded);
257 }
258
259 /*
260  * this is the extract of mime_decode which can be called if 'dont_decode' was set; 
261  * to save the cpu intense process of decoding to the time when it realy wants the content. 
262  * returns: 
263  *   - > 0 we decoded something, its on *decoded, you need to free it.
264  *   - = 0 no need to decode stuff. *decoded will be NULL.
265  *   - < 0 an error occured, either an unknown encoding, or alloc failed. no need to free.
266  */
267 int mime_decode_now (char *part_start, 
268                      size_t length,
269                      char *encoding,
270                      char **decoded,
271                      size_t *bytes_decoded)
272 {
273         *bytes_decoded = 0;
274         *decoded = NULL;
275         /* Some encodings aren't really encodings */
276         if (!strcasecmp(encoding, "7bit"))
277                 *encoding = '\0';
278         if (!strcasecmp(encoding, "8bit"))
279                 *encoding = '\0';
280         if (!strcasecmp(encoding, "binary"))
281                 *encoding = '\0';
282
283         /* If this part is not encoded, send as-is */
284         if (strlen(encoding) == 0) {
285                 return 0;
286         }
287         
288
289         /* Fail if we hit an unknown encoding. */
290         if ((strcasecmp(encoding, "base64"))
291             && (strcasecmp(encoding, "quoted-printable"))) {
292                 return -1;
293         }
294
295         /*
296          * Allocate a buffer for the decoded data.  The output buffer is slightly
297          * larger than the input buffer; this assumes that the decoded data
298          * will never be significantly larger than the encoded data.  This is a
299          * safe assumption with base64, uuencode, and quoted-printable.
300          */
301         *decoded = malloc(length + 32768);
302         if (decoded == NULL) {
303                 return -1;
304         }
305
306         if (!strcasecmp(encoding, "base64")) {
307                 *bytes_decoded = CtdlDecodeBase64(*decoded, part_start, length);
308                 return 1;
309         }
310         else if (!strcasecmp(encoding, "quoted-printable")) {
311                 *bytes_decoded = CtdlDecodeQuotedPrintable(*decoded, part_start, length);
312                 return 1;
313         }
314         return -1;
315 }
316
317 typedef enum _eIntMimeHdrs {
318         boundary,
319         startary,
320         endary,
321         content_type,
322         charset,
323         encoding,
324         content_type_name,
325         content_disposition_name,
326         filename,
327         disposition,
328         id,
329         eMax /* don't move ! */
330 } eIntMimeHdrs;
331
332 typedef struct _CBufStr {
333         char Key[SIZ];
334         long len;
335 }CBufStr;
336
337 typedef struct _interesting_mime_headers {
338         CBufStr b[eMax];
339         long content_length;
340         long is_multipart;
341 } interesting_mime_headers;
342
343
344 static void FlushInterestingMimes(interesting_mime_headers *m)
345 {
346         int i;
347         
348         for (i = 0; i < eMax; i++) {
349              m->b[i].Key[0] = '\0';
350              m->b[i].len = 0;
351         }
352         m->content_length = -1;
353 }
354 static interesting_mime_headers *InitInterestingMimes(void)
355 {
356         interesting_mime_headers *m;
357         m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
358
359         FlushInterestingMimes(m);
360
361         return m;
362 }
363
364
365 static long parse_MimeHeaders(interesting_mime_headers *m, 
366                               char** pcontent_start, 
367                               char *content_end)
368 {
369         char buf[SIZ];
370         char header[SIZ];
371         long headerlen;
372         char *ptr, *pch;
373         int buflen = 0;
374         int i;
375
376         /* Learn interesting things from the headers */
377         ptr = *pcontent_start;
378         *header = '\0';
379         headerlen = 0;
380         do {
381                 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
382
383                 for (i = 0; i < buflen; ++i) {
384                         if (isspace(buf[i])) {
385                                 buf[i] = ' ';
386                         }
387                 }
388
389                 if (!isspace(buf[0]) && (headerlen > 0)) {
390                         if (!strncasecmp(header, "Content-type:", 13)) {
391                                 memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
392                                 m->b[content_type].Key[headerlen - 12] = '\0';
393                                 m->b[content_type].len = striplt (m->b[content_type].Key);
394
395                                 m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
396                                 m->b[charset].len           = extract_key(m->b[charset].Key,           CKEY(m->b[content_type]), HKEY("charset"), '=');
397                                 m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
398
399                                 /* Deal with weird headers */
400                                 pch = strchr(m->b[content_type].Key, ' ');
401                                 if (pch != NULL) {
402                                         *pch = '\0';
403                                         m->b[content_type].len = m->b[content_type].Key - pch;
404                                 }
405                                 pch = strchr(m->b[content_type].Key, ';');
406                                 if (pch != NULL) {
407                                         *pch = '\0';
408                                         m->b[content_type].len = m->b[content_type].Key - pch;
409                                 }
410                         }
411                         else if (!strncasecmp(header, "Content-Disposition:", 20)) {
412                                 memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
413                                 m->b[disposition].Key[headerlen - 19] = '\0';
414                                 m->b[disposition].len = striplt(m->b[disposition].Key);
415
416                                 m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
417                                 m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
418                                 pch = strchr(m->b[disposition].Key, ';');
419                                 if (pch != NULL) *pch = '\0';
420                                 m->b[disposition].len = striplt(m->b[disposition].Key);
421                         }
422                         else if (!strncasecmp(header, "Content-ID:", 11)) {
423                                 memcpy(m->b[id].Key, &header[11], headerlen - 11);
424                                 m->b[id].Key[headerlen - 11] = '\0';
425                                 striplt(m->b[id].Key);
426                                 m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
427                         }
428                         else if (!strncasecmp(header, "Content-length: ", 15)) {
429                                 char *clbuf;
430                                 clbuf = &header[15];
431                                 while (isspace(*clbuf))
432                                         clbuf ++;
433                                 m->content_length = (size_t) atol(clbuf);
434                         }
435                         else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
436                                 memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
437                                 m->b[encoding].Key[headerlen - 26] = '\0';
438                                 m->b[encoding].len = striplt(m->b[encoding].Key);
439                         }
440                         *header = '\0';
441                         headerlen = 0;
442                 }
443                 if ((headerlen + buflen + 2) < SIZ) {
444                         memcpy(&header[headerlen], buf, buflen);
445                         headerlen += buflen;
446                         header[headerlen] = '\0';
447                 }
448                 if (ptr >= content_end) {
449                         return -1;
450                 }
451         } while ((!IsEmptyStr(buf)) && (*ptr != 0));
452
453         m->is_multipart = m->b[boundary].len != 0;
454         *pcontent_start = ptr;
455
456         return 0;
457 }
458
459
460 static int IsAsciiEncoding(interesting_mime_headers *m)
461 {
462
463         if ((m->b[encoding].len != 0) &&
464             (strcasecmp(m->b[encoding].Key, "base64") == 0))
465                 return 1;
466         if ((m->b[encoding].len != 0) &&
467             (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
468                 return 1;
469
470         return 0;
471 }
472
473 static char *FindNextContent(char *ptr,
474                              char *content_end,
475                              interesting_mime_headers *SubMimeHeaders,
476                              interesting_mime_headers *m)
477 {
478         char *next_boundary;
479         char  tmp;
480
481         if (IsAsciiEncoding(SubMimeHeaders)) {
482                 tmp = *content_end;
483                 *content_end = '\0';
484
485                 /** 
486                  * ok, if we have a content length of the mime part, 
487                  * try skipping the content on the search for the next
488                  * boundary. since we don't trust the content_length
489                  * to be all accurate, and suspect it to lose one digit 
490                  * per line with a line length of 80 chars, we need 
491                  * to start searching a little before..
492                  */
493                                    
494                 if ((SubMimeHeaders->content_length != -1) &&
495                     (SubMimeHeaders->content_length > 10))
496                 {
497                         char *pptr;
498                         long lines;
499                                         
500                         lines = SubMimeHeaders->content_length / 80;
501                         pptr = ptr + SubMimeHeaders->content_length - lines - 10;
502                         if (pptr < content_end)
503                                 ptr = pptr;
504                 }
505                         
506                 next_boundary = strstr(ptr, m->b[startary].Key);
507                 *content_end = tmp;
508         }
509         else {
510                 char *srch;
511                 /** 
512                  * ok, if we have a content length of the mime part, 
513                  * try skipping the content on the search for the next
514                  * boundary. since we don't trust the content_length
515                  * to be all accurate, start searching a little before..
516                  */
517                                    
518                 if ((SubMimeHeaders->content_length != -1) &&
519                     (SubMimeHeaders->content_length > 10))
520                 {
521                         char *pptr;
522                         pptr = ptr + SubMimeHeaders->content_length - 10;
523                         if (pptr < content_end)
524                                 ptr = pptr;
525                 }
526                 
527
528                 srch = next_boundary = NULL;
529                 for (srch = memchr(ptr, '-',  content_end - ptr);
530                      (srch != NULL) && (srch < content_end); 
531                      srch = memchr(srch, '-',  content_end - srch)) 
532                 {
533                         if (!memcmp(srch, 
534                                     m->b[startary].Key, 
535                                     m->b[startary].len)) 
536                         {
537                                 next_boundary = srch;
538                                 srch = content_end;
539                         }
540                         else srch ++;
541
542                 }
543
544         }
545         return next_boundary;
546 }
547
548 /*
549  * Break out the components of a multipart message
550  * (This function expects to be fed HEADERS + CONTENT)
551  * Note: NULL can be supplied as content_end; in this case, the message is
552  * considered to have ended when the parser encounters a 0x00 byte.
553  */
554 static void recurseable_mime_parser(char *partnum,
555                                     char *content_start, char *content_end,
556                                     MimeParserCallBackType CallBack,
557                                     MimeParserCallBackType PreMultiPartCallBack,
558                                     MimeParserCallBackType PostMultiPartCallBack,
559                                     void *userdata,
560                                     int dont_decode, 
561                                     interesting_mime_headers *m)
562 {
563         interesting_mime_headers *SubMimeHeaders;
564         char     *ptr;
565         char     *part_start;
566         char     *part_end = NULL;
567         char     *evaluate_crlf_ptr = NULL;
568         char     *next_boundary;
569         char      nested_partnum[256];
570         int       crlf_in_use = 0;
571         int       part_seq = 0;
572         CBufStr  *chosen_name;
573
574
575         /* If this is a multipart message, then recursively process it */
576         ptr = content_start;
577         part_start = NULL;
578         if (m->is_multipart) {
579
580                 /* Tell the client about this message's multipartedness */
581                 if (PreMultiPartCallBack != NULL) {
582                         PreMultiPartCallBack("", 
583                                              "", 
584                                              partnum, 
585                                              "",
586                                              NULL, 
587                                              m->b[content_type].Key, 
588                                              m->b[charset].Key,
589                                              0, 
590                                              m->b[encoding].Key, 
591                                              m->b[id].Key, 
592                                              userdata);
593                 }
594
595                 /* Figure out where the boundaries are */
596                 m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
597                 SubMimeHeaders = InitInterestingMimes ();
598
599                 while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
600
601                 if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
602                         ptr += m->b[startary].len;
603
604                 while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
605
606                 part_start = NULL;
607                 do {
608                         char *optr;
609
610                         optr = ptr;
611                         if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
612                                 break;
613                         if ((ptr - optr > 2) && (*(ptr - 2) == '\r')) {
614                                 crlf_in_use = 1;
615                         }
616                         
617                         part_start = ptr;
618                         
619                         next_boundary = FindNextContent(ptr, content_end, SubMimeHeaders, m);
620                         if ((next_boundary != NULL) && (next_boundary - part_start < 3)) {
621                                 FlushInterestingMimes(SubMimeHeaders);
622                                 continue;
623                         }
624
625                         if ( (part_start != NULL) && (next_boundary != NULL) ) {
626                                 part_end = next_boundary;
627                                 --part_end;             /* omit the trailing LF */
628                                 if (crlf_in_use) {
629                                         --part_end;     /* omit the trailing CR */
630                                 }
631
632                                 if (!IsEmptyStr(partnum)) {
633                                         snprintf(nested_partnum,
634                                                  sizeof nested_partnum,
635                                                  "%s.%d", partnum,
636                                                  ++part_seq);
637                                 }
638                                 else {
639                                         snprintf(nested_partnum,
640                                                  sizeof nested_partnum,
641                                                  "%d", ++part_seq);
642                                 }
643                                 recurseable_mime_parser(nested_partnum,
644                                                         part_start, 
645                                                         part_end,
646                                                         CallBack,
647                                                         PreMultiPartCallBack,
648                                                         PostMultiPartCallBack,
649                                                         userdata,
650                                                         dont_decode, 
651                                                         SubMimeHeaders);
652                         }
653
654                         if (next_boundary != NULL) {
655                                 /* If we pass out of scope, don't attempt to
656                                  * read past the end boundary. */
657                                 if ((*(next_boundary + m->b[startary].len) == '-') && 
658                                     (*(next_boundary + m->b[startary].len + 1) == '-') ){
659                                         ptr = content_end;
660                                 }
661                                 else {
662                                         /* Set up for the next part. */
663                                         part_start = strstr(next_boundary, "\n");
664                                         
665                                         /* Determine whether newlines are LF or CRLF */
666                                         evaluate_crlf_ptr = part_start;
667                                         --evaluate_crlf_ptr;
668                                         if ((*evaluate_crlf_ptr == '\r') && (*(evaluate_crlf_ptr + 1) == '\n')) {
669                                                 crlf_in_use = 1;
670                                         }
671                                         else {
672                                                 crlf_in_use = 0;
673                                         }
674
675                                         /* Advance past the LF ... now we're in the next part */
676                                         ++part_start;
677                                         ptr = part_start;
678                                 }
679                         }
680                         else {
681                                 /* Invalid end of multipart.  Bail out! */
682                                 ptr = content_end;
683                         }
684                         FlushInterestingMimes(SubMimeHeaders);
685                 } while ( (ptr < content_end) && (next_boundary != NULL) );
686
687                 free(SubMimeHeaders);
688
689                 if (PostMultiPartCallBack != NULL) {
690                         PostMultiPartCallBack("", 
691                                               "", 
692                                               partnum, 
693                                               "", 
694                                               NULL,
695                                               m->b[content_type].Key, 
696                                               m->b[charset].Key,
697                                               0, 
698                                               m->b[encoding].Key, 
699                                               m->b[id].Key, 
700                                               userdata);
701                 }
702         } /* If it's not a multipart message, then do something with it */
703         else {
704                 size_t length;
705                 part_start = ptr;
706                 length = content_end - part_start;
707                 ptr = part_end = content_end;
708
709                 /* The following code will truncate the MIME part to the size
710                  * specified by the Content-length: header.   We have commented it
711                  * out because these headers have a tendency to be wrong.
712                  *
713                  *      if ( (content_length > 0) && (length > content_length) ) {
714                  *              length = content_length;
715                  *      }
716                  */
717
718                 /* Sometimes the "name" field is tacked on to Content-type,
719                  * and sometimes it's tacked on to Content-disposition.  Use
720                  * whichever one we have.
721                  */
722                 if (m->b[content_disposition_name].len > m->b[content_type_name].len) {
723                         chosen_name = &m->b[content_disposition_name];
724                 }
725                 else {
726                         chosen_name = &m->b[content_type_name];
727                 }
728         
729                 // Ok, we've got a non-multipart part here, so do something with it.
730                 mime_decode(partnum,
731                             part_start, 
732                             length,
733                             m->b[content_type].Key, 
734                             m->b[charset].Key,
735                             m->b[encoding].Key, 
736                             m->b[disposition].Key, 
737                             m->b[id].Key, 
738                             chosen_name->Key, 
739                             m->b[filename].Key,
740                             CallBack, 
741                             NULL, NULL,
742                             userdata, 
743                             dont_decode
744                         );
745
746                 /*
747                  * Now if it's an encapsulated message/rfc822 then we have to recurse into it
748                  */
749                 if (!strcasecmp(&m->b[content_type].Key[0], "message/rfc822")) {
750
751                         if (PreMultiPartCallBack != NULL) {
752                                 PreMultiPartCallBack("", 
753                                                      "", 
754                                                      partnum, 
755                                                      "",
756                                                      NULL, 
757                                                      m->b[content_type].Key, 
758                                                      m->b[charset].Key,
759                                                      0, 
760                                                      m->b[encoding].Key, 
761                                                      m->b[id].Key, 
762                                                      userdata);
763                         }
764                         if (CallBack != NULL) {
765                                 if (strlen(partnum) > 0) {
766                                         snprintf(nested_partnum,
767                                                  sizeof nested_partnum,
768                                                  "%s.%d", partnum,
769                                                  ++part_seq);
770                                 }
771                                 else {
772                                         snprintf(nested_partnum,
773                                                  sizeof nested_partnum,
774                                                  "%d", ++part_seq);
775                                 }
776                                 the_mime_parser(nested_partnum,
777                                                 part_start, 
778                                                 part_end,
779                                                 CallBack,
780                                                 PreMultiPartCallBack,
781                                                 PostMultiPartCallBack,
782                                                 userdata,
783                                                 dont_decode
784                                         );
785                         }
786                         if (PostMultiPartCallBack != NULL) {
787                                 PostMultiPartCallBack("", 
788                                                       "", 
789                                                       partnum, 
790                                                       "", 
791                                                       NULL,
792                                                       m->b[content_type].Key, 
793                                                       m->b[charset].Key,
794                                                       0, 
795                                                       m->b[encoding].Key, 
796                                                       m->b[id].Key, 
797                                                       userdata);
798                         }
799
800
801                 }
802
803         }
804
805 }
806
807 /*
808  * Break out the components of a multipart message
809  * (This function expects to be fed HEADERS + CONTENT)
810  * Note: NULL can be supplied as content_end; in this case, the message is
811  * considered to have ended when the parser encounters a 0x00 byte.
812  */
813 void the_mime_parser(char *partnum,
814                      char *content_start, char *content_end,
815                      MimeParserCallBackType CallBack,
816                      MimeParserCallBackType PreMultiPartCallBack,
817                      MimeParserCallBackType PostMultiPartCallBack,
818                      void *userdata,
819                      int dont_decode)
820 {
821         interesting_mime_headers *m;
822
823         /* If the caller didn't supply an endpointer, generate one by measure */
824         if (content_end == NULL) {
825                 content_end = &content_start[strlen(content_start)];
826         }
827
828         m = InitInterestingMimes();
829
830         if (!parse_MimeHeaders(m, &content_start, content_end))
831         {
832
833                 recurseable_mime_parser(partnum,
834                                         content_start, content_end,
835                                         CallBack,
836                                         PreMultiPartCallBack,
837                                         PostMultiPartCallBack,
838                                         userdata,
839                                         dont_decode,
840                                         m);
841         }
842         free(m);
843 }
844
845
846 /*
847  * Entry point for the MIME parser.
848  * (This function expects to be fed HEADERS + CONTENT)
849  * Note: NULL can be supplied as content_end; in this case, the message is
850  * considered to have ended when the parser encounters a 0x00 byte.
851  */
852 void mime_parser(char *content_start,
853                  char *content_end,
854                  MimeParserCallBackType CallBack,
855                  MimeParserCallBackType PreMultiPartCallBack,
856                  MimeParserCallBackType PostMultiPartCallBack,
857                  void *userdata,
858                  int dont_decode)
859 {
860
861         the_mime_parser("", content_start, content_end,
862                         CallBack,
863                         PreMultiPartCallBack,
864                         PostMultiPartCallBack,
865                         userdata, dont_decode);
866 }
867
868
869
870
871
872
873 typedef struct _MimeGuess {
874         const char *Pattern;
875         size_t PatternLen;
876         long PatternOffset;
877         const char *MimeString;
878 } MimeGuess;
879
880 MimeGuess MyMimes [] = {
881         {
882                 "GIF",
883                 3,
884                 0,
885                 "image/gif"
886         },
887         {
888                 "\xff\xd8",
889                 2,
890                 0,
891                 "image/jpeg"
892         },
893         {
894                 "\x89PNG",
895                 4,
896                 0,
897                 "image/png"
898         },
899         { // last...
900                 "",
901                 0,
902                 0,
903                 ""
904         }
905 };
906
907
908 const char *GuessMimeType(const char *data, size_t dlen)
909 {
910         int MimeIndex = 0;
911
912         while (MyMimes[MimeIndex].PatternLen != 0)
913         {
914                 if ((MyMimes[MimeIndex].PatternLen + 
915                      MyMimes[MimeIndex].PatternOffset < dlen) &&
916                     strncmp(MyMimes[MimeIndex].Pattern, 
917                             &data[MyMimes[MimeIndex].PatternOffset], 
918                             MyMimes[MimeIndex].PatternLen) == 0)
919                 {
920                         return MyMimes[MimeIndex].MimeString;
921                 }
922                 MimeIndex ++;
923         }
924         /* 
925          * ok, our simple minded algorythm didn't find anything, 
926          * let the big chegger try it, he wil default to application/octet-stream
927          */
928         return (xdg_mime_get_mime_type_for_data(data, dlen));
929 }
930
931
932 const char* GuessMimeByFilename(const char *what, size_t len)
933 {
934         /* we know some hardcoded on our own, try them... */
935         if ((len > 3) && !strncasecmp(&what[len - 4], ".gif", 4))
936                 return "image/gif";
937         else if ((len > 2) && !strncasecmp(&what[len - 3], ".js", 3))
938                 return  "text/javascript";
939         else if ((len > 3) && !strncasecmp(&what[len - 4], ".txt", 4))
940                 return "text/plain";
941         else if ((len > 3) && !strncasecmp(&what[len - 4], ".css", 4))
942                 return "text/css";
943         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htc", 4))
944                 return "text/x-component";
945         else if ((len > 3) && !strncasecmp(&what[len - 4], ".jpg", 4))
946                 return "image/jpeg";
947         else if ((len > 4) && !strncasecmp(&what[len - 5], ".jpeg", 5))
948                 return "image/jpeg";
949         else if ((len > 3) && !strncasecmp(&what[len - 4], ".png", 4))
950                 return "image/png";
951         else if ((len > 3) && !strncasecmp(&what[len - 4], ".ico", 4))
952                 return "image/x-icon";
953         else if ((len > 3) && !strncasecmp(&what[len - 4], ".vcf", 4))
954                 return "text/x-vcard";
955         else if ((len > 4) && !strncasecmp(&what[len - 5], ".html", 5))
956                 return "text/html";
957         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htm", 4))
958                 return "text/html";
959         else if ((len > 3) && !strncasecmp(&what[len - 4], ".wml", 4))
960                 return "text/vnd.wap.wml";
961         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmls", 5))
962                 return "text/vnd.wap.wmlscript";
963         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmlc", 5))
964                 return "application/vnd.wap.wmlc";
965         else if ((len > 5) && !strncasecmp(&what[len - 6], ".wmlsc", 6))
966                 return "application/vnd.wap.wmlscriptc";
967         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wbmp", 5))
968                 return "image/vnd.wap.wbmp";
969         else
970                 /* and let xdgmime do the fallback. */
971                 return xdg_mime_get_mime_type_from_file_name(what);
972 }
973
974 static HashList *IconHash = NULL;
975
976 typedef struct IconName IconName;
977
978 struct IconName {
979         char *FlatName;
980         char *FileName;
981 };
982
983 static void DeleteIcon(void *IconNamePtr)
984 {
985         IconName *Icon = (IconName*) IconNamePtr;
986         free(Icon->FlatName);
987         free(Icon->FileName);
988         free(Icon);
989 }
990
991 /*
992 static const char *PrintFlat(void *IconNamePtr)
993 {
994         IconName *Icon = (IconName*) IconNamePtr;
995         return Icon->FlatName;
996 }
997 static const char *PrintFile(void *IconNamePtr)
998 {
999         IconName *Icon = (IconName*) IconNamePtr;
1000         return Icon->FileName;
1001 }
1002 */
1003
1004 #define GENSTR "x-generic"
1005 #define IGNORE_PREFIX_1 "gnome-mime"
1006 int LoadIconDir(const char *DirName)
1007 {
1008         DIR *filedir = NULL;
1009         struct dirent *filedir_entry;
1010         int d_namelen;
1011         int d_without_ext;
1012         IconName *Icon;
1013
1014         filedir = opendir (DirName);
1015         IconHash = NewHash(1, NULL);
1016         if (filedir == NULL) {
1017                 return 0;
1018         }
1019
1020         while ((filedir_entry = readdir(filedir)))
1021         {
1022                 char *MinorPtr;
1023                 char *PStart;
1024 #ifdef _DIRENT_HAVE_D_NAMLEN
1025                 d_namelen = filedir_entry->d_namlen;
1026 #else
1027                 d_namelen = strlen(filedir_entry->d_name);
1028 #endif
1029                 d_without_ext = d_namelen;
1030                 while ((d_without_ext > 0) && (filedir_entry->d_name[d_without_ext] != '.'))
1031                         d_without_ext --;
1032                 if ((d_without_ext == 0) || (d_namelen < 3))
1033                         continue;
1034
1035                 if ((sizeof(IGNORE_PREFIX_1) < d_namelen) &&
1036                     (strncmp(IGNORE_PREFIX_1, 
1037                              filedir_entry->d_name, 
1038                              sizeof(IGNORE_PREFIX_1) - 1) == 0)) {
1039                         PStart = filedir_entry->d_name + sizeof(IGNORE_PREFIX_1);
1040                         d_without_ext -= sizeof(IGNORE_PREFIX_1);
1041                 }
1042                 else {
1043                         PStart = filedir_entry->d_name;
1044                 }
1045                 Icon = malloc(sizeof(IconName));
1046
1047                 Icon->FileName = malloc(d_namelen + 1);
1048                 memcpy(Icon->FileName, filedir_entry->d_name, d_namelen + 1);
1049
1050                 Icon->FlatName = malloc(d_without_ext + 1);
1051                 memcpy(Icon->FlatName, PStart, d_without_ext);
1052                 Icon->FlatName[d_without_ext] = '\0';
1053                 /* Try to find Minor type in image-jpeg */
1054                 MinorPtr = strchr(Icon->FlatName, '-');
1055                 if (MinorPtr != NULL) {
1056                         size_t MinorLen;
1057                         MinorLen = 1 + d_without_ext - (MinorPtr - Icon->FlatName + 1);
1058                         if ((MinorLen == sizeof(GENSTR)) && 
1059                             (strncmp(MinorPtr + 1, GENSTR, sizeof(GENSTR)) == 0)) {
1060                                 /* ok, we found a generic filename. cut the generic. */
1061                                 *MinorPtr = '\0';
1062                                 d_without_ext = d_without_ext - (MinorPtr - Icon->FlatName);
1063                         }
1064                         else { /* Map the major / minor separator to / */
1065                                 *MinorPtr = '/';
1066                         }
1067                 }
1068
1069 //              PrintHash(IconHash, PrintFlat, PrintFile);
1070 //              printf("%s - %s\n", Icon->FlatName, Icon->FileName);
1071                 Put(IconHash, Icon->FlatName, d_without_ext, Icon, DeleteIcon);
1072 //              PrintHash(IconHash, PrintFlat, PrintFile);
1073         }
1074         closedir(filedir);
1075         return 1;
1076 }
1077
1078 const char *GetIconFilename(char *MimeType, size_t len)
1079 {
1080         void *vIcon;
1081         IconName *Icon;
1082         
1083         if(IconHash == NULL)
1084                 return NULL;
1085
1086         GetHash(IconHash, MimeType, len, &vIcon), Icon = (IconName*) vIcon;
1087         /* didn't find the exact mimetype? try major only. */
1088         if (Icon == NULL) {
1089                 char * pMinor;
1090                 pMinor = strchr(MimeType, '/');
1091                 if (pMinor != NULL) {
1092                         *pMinor = '\0';
1093                         GetHash(IconHash, MimeType, pMinor - MimeType, &vIcon),
1094                                 Icon = (IconName*) vIcon;
1095                 }
1096         }
1097         if (Icon == NULL) {
1098                 return NULL;
1099         }
1100
1101         /*printf("Getting: [%s] == [%s] -> [%s]\n", MimeType, Icon->FlatName, Icon->FileName);*/
1102         return Icon->FileName;
1103 }
1104
1105 void ShutDownLibCitadelMime(void)
1106 {
1107         DeleteHash(&IconHash);
1108 }