libcitadel/lib/mime_parser.c

   1 // This is the MIME parser for Citadel.
   2 //
   3 // Copyright (c) 1998-2023 by the citadel.org development team.
   4 //
   5 // This program is open source software.  Use, duplication, or disclosure
   6 // is subject to the terms of the GNU General Public License, version 3.
   7
   8 #include <stdlib.h>
   9 #include <unistd.h>
  10 #include <stdio.h>
  11 #include <signal.h>
  12 #include <sys/types.h>
  13 #include <ctype.h>
  14 #include <string.h>
  15 #include <sys/stat.h>
  16 #include <sys/types.h>
  17 #include <dirent.h>
  18 #include <errno.h>
  19
  20 #include "xdgmime/xdgmime.h"
  21 #include "libcitadel.h"
  22 #include "libcitadellocal.h"
  23
  24 const unsigned char FromHexTable[256] = {
  25         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //  0
  26         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 10
  27         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 20
  28         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 30
  29         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, // 40
  30         0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, // 50
  31         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, // 60
  32         0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 70
  33         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 80
  34         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, // 90
  35         0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //100
  36         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //110
  37         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //120
  38         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //130
  39         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //140
  40         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //150
  41         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //160
  42         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //170
  43         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //180
  44         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //190
  45         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //200
  46         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //210
  47         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //220
  48         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //230
  49         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, //240
  50         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF                          //250
  51 };
  52
  53
  54 long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd) {
  55         char *sptr, *ptr = NULL;
  56         int double_quotes = 0;
  57         long RealKeyLen = keylen;
  58
  59         sptr = source;
  60
  61         while (sptr != NULL) {
  62                 ptr = bmstrcasestr_len(sptr, sourcelen - (sptr - source), key, keylen);
  63                 if (ptr != NULL) {
  64                         while (isspace(*(ptr + RealKeyLen)))
  65                                 RealKeyLen ++;
  66                         if (*(ptr + RealKeyLen) == KeyEnd) {
  67                                 sptr = NULL;
  68                                 RealKeyLen ++;
  69                         }
  70                         else {
  71                                 sptr = ptr + RealKeyLen + 1;
  72                         }
  73                 }
  74                 else
  75                         sptr = ptr;
  76         }
  77         if (ptr == NULL) {
  78                 *target = '\0';
  79                 return 0;
  80         }
  81         strcpy(target, (ptr + RealKeyLen));
  82
  83         for (ptr=target; (*ptr != 0); ptr++) {
  84
  85                 // A semicolon means we've hit the end of the key, unless we're inside double quotes
  86                 if ( (double_quotes != 1) && (*ptr == ';')) {
  87                         *ptr = 0;
  88                 }
  89
  90                 // if we find double quotes, we've got a great set of string boundaries
  91                 if (*ptr == '\"') {
  92                         ++double_quotes;
  93                         if (double_quotes == 1) {
  94                                 strcpy(ptr, ptr+1);
  95                         }
  96                         else {
  97                                 *ptr = 0;
  98                         }
  99                 }
 100         }
 101         *ptr = '\0';
 102         return ptr - target;
 103 }
 104
 105
 106 // For non-multipart messages, we need to generate a quickie partnum of "1"
 107 // to return to callback functions.  Some callbacks demand it.
 108 char *fixed_partnum(char *supplied_partnum) {
 109         if (supplied_partnum == NULL) return "1";
 110         if (strlen(supplied_partnum)==0) return "1";
 111         return supplied_partnum;
 112 }
 113
 114
 115 static inline unsigned int _decode_hex(const char *Source) {
 116         unsigned int ret = '?';
 117         unsigned char LO_NIBBLE;
 118         unsigned char HI_NIBBLE;
 119
 120         HI_NIBBLE = FromHexTable[(unsigned char) *Source];
 121         LO_NIBBLE = FromHexTable[(unsigned char) *(Source+1)];
 122
 123         if ((LO_NIBBLE == 0xFF) || (LO_NIBBLE == 0xFF))
 124                 return ret;
 125         ret = HI_NIBBLE;
 126         ret = ret << 4;
 127         ret = ret | LO_NIBBLE;
 128         return ret;
 129 }
 130
 131 unsigned int decode_hex(char *Source) {return _decode_hex(Source);}
 132
 133
 134 // Convert "quoted-printable" to binary.  Returns number of bytes decoded.
 135 // according to RFC2045 section 6.7
 136 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
 137         unsigned int ch;
 138         int decoded_length = 0;
 139         int pos = 0;
 140
 141         while (pos < sourcelen) {
 142                 if (*(encoded + pos) == '=') {
 143                         pos ++;
 144                         if (*(encoded + pos) == '\n') {
 145                                 pos ++;
 146                         }
 147                         else if (*(encoded + pos) == '\r') {
 148                                 pos ++;
 149                                 if (*(encoded + pos) == '\n')
 150                                         pos++;
 151                         }
 152                         else {
 153                                 ch = _decode_hex(&encoded[pos]);
 154                                 pos += 2;
 155                                 decoded[decoded_length++] = ch;
 156                         }
 157                 }
 158                 else {
 159                         decoded[decoded_length++] = encoded[pos];
 160                         pos += 1;
 161                 }
 162         }
 163         decoded[decoded_length] = 0;
 164         return(decoded_length);
 165 }
 166
 167
 168 // Given a message or message-part body and a length, handle any necessary
 169 // decoding and pass the request up the stack.
 170 void mime_decode(char *partnum,
 171                  char *part_start, size_t length,
 172                  char *content_type, char *charset, char *encoding,
 173                  char *disposition,
 174                  char *id,
 175                  char *name, char *filename,
 176                  MimeParserCallBackType CallBack,
 177                  MimeParserCallBackType PreMultiPartCallBack,
 178                  MimeParserCallBackType PostMultiPartCallBack,
 179                  void *userdata,
 180                  int dont_decode
 181 ) {
 182         char *decoded;
 183         size_t bytes_decoded = 0;
 184
 185         // Some encodings aren't really encodings
 186         if (!strcasecmp(encoding, "7bit"))
 187                 *encoding = '\0';
 188         if (!strcasecmp(encoding, "8bit"))
 189                 *encoding = '\0';
 190         if (!strcasecmp(encoding, "binary"))
 191                 *encoding = '\0';
 192         if (!strcasecmp(encoding, "ISO-8859-1"))
 193                 *encoding = '\0';
 194
 195         // If this part is not encoded, send as-is
 196         if ( (strlen(encoding) == 0) || (dont_decode)) {
 197                 if (CallBack != NULL) {
 198                         CallBack(name,
 199                                  filename,
 200                                  fixed_partnum(partnum),
 201                                  disposition,
 202                                  part_start,
 203                                  content_type,
 204                                  charset,
 205                                  length,
 206                                  encoding,
 207                                  id,
 208                                  userdata);
 209                         }
 210                 return;
 211         }
 212
 213         // Fail silently if we hit an unknown encoding.
 214         if ((strcasecmp(encoding, "base64")) && (strcasecmp(encoding, "quoted-printable"))) {
 215                 return;
 216         }
 217
 218         // Allocate a buffer for the decoded data.  The output buffer is slightly
 219         // larger than the input buffer; this assumes that the decoded data
 220         // will never be significantly larger than the encoded data.  This is a
 221         // safe assumption with base64, uuencode, and quoted-printable.
 222         decoded = malloc(length + 32768);
 223         if (decoded == NULL) {
 224                 return;
 225         }
 226
 227         if (!strcasecmp(encoding, "base64")) {
 228                 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
 229         }
 230         else if (!strcasecmp(encoding, "quoted-printable")) {
 231                 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
 232         }
 233
 234         if (bytes_decoded > 0) if (CallBack != NULL) {
 235                         char encoding_buf[SIZ];
 236
 237                         strcpy(encoding_buf, "binary");
 238                         CallBack(name,
 239                                  filename,
 240                                  fixed_partnum(partnum),
 241                                  disposition,
 242                                  decoded,
 243                                  content_type,
 244                                  charset,
 245                                  bytes_decoded,
 246                                  encoding_buf,
 247                                  id,
 248                                  userdata);
 249         }
 250
 251         free(decoded);
 252 }
 253
 254 // this is the extract of mime_decode which can be called if 'dont_decode' was set;
 255 // to save the cpu intense process of decoding to the time when it realy wants the content.
 256 // returns:
 257 //   - > 0 we decoded something, its on *decoded, you need to free it.
 258 //   - = 0 no need to decode stuff. *decoded will be NULL.
 259 //   - < 0 an error occured, either an unknown encoding, or alloc failed. no need to free.
 260 int mime_decode_now (char *part_start,
 261                      size_t length,
 262                      char *encoding,
 263                      char **decoded,
 264                      size_t *bytes_decoded)
 265 {
 266         *bytes_decoded = 0;
 267         *decoded = NULL;
 268         // Some encodings aren't really encodings
 269         if (!strcasecmp(encoding, "7bit"))
 270                 *encoding = '\0';
 271         if (!strcasecmp(encoding, "8bit"))
 272                 *encoding = '\0';
 273         if (!strcasecmp(encoding, "binary"))
 274                 *encoding = '\0';
 275
 276         // If this part is not encoded, send as-is
 277         if (strlen(encoding) == 0) {
 278                 return 0;
 279         }
 280
 281
 282         // Fail if we hit an unknown encoding.
 283         if ((strcasecmp(encoding, "base64"))
 284             && (strcasecmp(encoding, "quoted-printable"))) {
 285                 return -1;
 286         }
 287
 288         // Allocate a buffer for the decoded data.  The output buffer is slightly
 289         // larger than the input buffer; this assumes that the decoded data
 290         // will never be significantly larger than the encoded data.  This is a
 291         // safe assumption with base64, uuencode, and quoted-printable.
 292         *decoded = malloc(length + 32768);
 293         if (decoded == NULL) {
 294                 return -1;
 295         }
 296
 297         if (!strcasecmp(encoding, "base64")) {
 298                 *bytes_decoded = CtdlDecodeBase64(*decoded, part_start, length);
 299                 return 1;
 300         }
 301         else if (!strcasecmp(encoding, "quoted-printable")) {
 302                 *bytes_decoded = CtdlDecodeQuotedPrintable(*decoded, part_start, length);
 303                 return 1;
 304         }
 305         return -1;
 306 }
 307
 308 typedef enum _eIntMimeHdrs {
 309         boundary,
 310         startary,
 311         endary,
 312         content_type,
 313         charset,
 314         encoding,
 315         content_type_name,
 316         content_disposition_name,
 317         filename,
 318         disposition,
 319         id,
 320         eMax /* don't move ! */
 321 } eIntMimeHdrs;
 322
 323 typedef struct _CBufStr {
 324         char Key[SIZ];
 325         long len;
 326 }CBufStr;
 327
 328 typedef struct _interesting_mime_headers {
 329         CBufStr b[eMax];
 330         long content_length;
 331         long is_multipart;
 332 } interesting_mime_headers;
 333
 334
 335 static void FlushInterestingMimes(interesting_mime_headers *m)
 336 {
 337         int i;
 338
 339         for (i = 0; i < eMax; i++) {
 340              m->b[i].Key[0] = '\0';
 341              m->b[i].len = 0;
 342         }
 343         m->content_length = -1;
 344 }
 345 static interesting_mime_headers *InitInterestingMimes(void)
 346 {
 347         interesting_mime_headers *m;
 348         m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
 349
 350         FlushInterestingMimes(m);
 351
 352         return m;
 353 }
 354
 355
 356 static long parse_MimeHeaders(interesting_mime_headers *m,
 357                               char** pcontent_start,
 358                               char *content_end)
 359 {
 360         char buf[SIZ];
 361         char header[SIZ];
 362         long headerlen;
 363         char *ptr, *pch;
 364         int buflen = 0;
 365         int i;
 366
 367         // Learn interesting things from the headers
 368         ptr = *pcontent_start;
 369         *header = '\0';
 370         headerlen = 0;
 371         do {
 372                 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
 373
 374                 for (i = 0; i < buflen; ++i) {
 375                         if (isspace(buf[i])) {
 376                                 buf[i] = ' ';
 377                         }
 378                 }
 379
 380                 if (!isspace(buf[0]) && (headerlen > 0)) {
 381                         if (!strncasecmp(header, "Content-type:", 13)) {
 382                                 memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
 383                                 m->b[content_type].Key[headerlen - 12] = '\0';
 384                                 m->b[content_type].len = string_trim (m->b[content_type].Key);
 385
 386                                 m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
 387                                 m->b[charset].len           = extract_key(m->b[charset].Key,           CKEY(m->b[content_type]), HKEY("charset"), '=');
 388                                 m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
 389
 390                                 /* Deal with weird headers */
 391                                 pch = strchr(m->b[content_type].Key, ' ');
 392                                 if (pch != NULL) {
 393                                         *pch = '\0';
 394                                         m->b[content_type].len = m->b[content_type].Key - pch;
 395                                 }
 396                                 pch = strchr(m->b[content_type].Key, ';');
 397                                 if (pch != NULL) {
 398                                         *pch = '\0';
 399                                         m->b[content_type].len = m->b[content_type].Key - pch;
 400                                 }
 401                         }
 402                         else if (!strncasecmp(header, "Content-Disposition:", 20)) {
 403                                 memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
 404                                 m->b[disposition].Key[headerlen - 19] = '\0';
 405                                 m->b[disposition].len = string_trim(m->b[disposition].Key);
 406
 407                                 m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
 408                                 m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
 409                                 pch = strchr(m->b[disposition].Key, ';');
 410                                 if (pch != NULL) *pch = '\0';
 411                                 m->b[disposition].len = string_trim(m->b[disposition].Key);
 412                         }
 413                         else if (!strncasecmp(header, "Content-ID:", 11)) {
 414                                 memcpy(m->b[id].Key, &header[11], headerlen - 11);
 415                                 m->b[id].Key[headerlen - 11] = '\0';
 416                                 string_trim(m->b[id].Key);
 417                                 m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
 418                         }
 419                         else if (!strncasecmp(header, "Content-length: ", 15)) {
 420                                 char *clbuf;
 421                                 clbuf = &header[15];
 422                                 while (isspace(*clbuf))
 423                                         clbuf ++;
 424                                 m->content_length = (size_t) atol(clbuf);
 425                         }
 426                         else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
 427                                 memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
 428                                 m->b[encoding].Key[headerlen - 26] = '\0';
 429                                 m->b[encoding].len = string_trim(m->b[encoding].Key);
 430                         }
 431                         *header = '\0';
 432                         headerlen = 0;
 433                 }
 434                 if ((headerlen + buflen + 2) < SIZ) {
 435                         memcpy(&header[headerlen], buf, buflen);
 436                         headerlen += buflen;
 437                         header[headerlen] = '\0';
 438                 }
 439                 if (ptr >= content_end) {
 440                         return -1;
 441                 }
 442         } while ((!IsEmptyStr(buf)) && (*ptr != 0));
 443
 444         m->is_multipart = m->b[boundary].len != 0;
 445         *pcontent_start = ptr;
 446
 447         return 0;
 448 }
 449
 450
 451 static int IsAsciiEncoding(interesting_mime_headers *m) {
 452
 453         if ((m->b[encoding].len != 0) &&
 454             (strcasecmp(m->b[encoding].Key, "base64") == 0))
 455                 return 1;
 456         if ((m->b[encoding].len != 0) &&
 457             (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
 458                 return 1;
 459
 460         return 0;
 461 }
 462
 463 static char *FindNextContent(char *ptr,
 464                              char *content_end,
 465                              interesting_mime_headers *SubMimeHeaders,
 466                              interesting_mime_headers *m)
 467 {
 468         char *next_boundary;
 469         char  tmp;
 470
 471         if (IsAsciiEncoding(SubMimeHeaders)) {
 472                 tmp = *content_end;
 473                 *content_end = '\0';
 474
 475                 // ok, if we have a content length of the mime part,
 476                 // try skipping the content on the search for the next
 477                 // boundary. since we don't trust the content_length
 478                 // to be all accurate, and suspect it to lose one digit
 479                 // per line with a line length of 80 chars, we need
 480                 // to start searching a little before..
 481
 482                 if ((SubMimeHeaders->content_length != -1) && (SubMimeHeaders->content_length > 10)) {
 483                         char *pptr;
 484                         long lines;
 485
 486                         lines = SubMimeHeaders->content_length / 80;
 487                         pptr = ptr + SubMimeHeaders->content_length - lines - 10;
 488                         if (pptr < content_end)
 489                                 ptr = pptr;
 490                 }
 491
 492                 next_boundary = strstr(ptr, m->b[startary].Key);
 493                 *content_end = tmp;
 494         }
 495         else {
 496                 char *srch;
 497                 // ok, if we have a content length of the mime part,
 498                 // try skipping the content on the search for the next
 499                 // boundary. since we don't trust the content_length
 500                 // to be all accurate, start searching a little before..
 501
 502                 if ((SubMimeHeaders->content_length != -1) && (SubMimeHeaders->content_length > 10)) {
 503                         char *pptr;
 504                         pptr = ptr + SubMimeHeaders->content_length - 10;
 505                         if (pptr < content_end)
 506                                 ptr = pptr;
 507                 }
 508
 509                 srch = next_boundary = NULL;
 510                 for (srch = memchr(ptr, '-',  content_end - ptr);
 511                      (srch != NULL) && (srch < content_end);
 512                      srch = memchr(srch, '-',  content_end - srch))
 513                 {
 514                         if (!memcmp(srch, m->b[startary].Key, m->b[startary].len)) {
 515                                 next_boundary = srch;
 516                                 srch = content_end;
 517                         }
 518                         else srch ++;
 519
 520                 }
 521
 522         }
 523         return next_boundary;
 524 }
 525
 526
 527 // Break out the components of a multipart message
 528 // (This function expects to be fed HEADERS + CONTENT)
 529 // Note: NULL can be supplied as content_end; in this case, the message is
 530 // considered to have ended when the parser encounters a 0x00 byte.
 531 static void recurseable_mime_parser(char *partnum,
 532                                     char *content_start, char *content_end,
 533                                     MimeParserCallBackType CallBack,
 534                                     MimeParserCallBackType PreMultiPartCallBack,
 535                                     MimeParserCallBackType PostMultiPartCallBack,
 536                                     void *userdata,
 537                                     int dont_decode,
 538                                     interesting_mime_headers *m)
 539 {
 540         interesting_mime_headers *SubMimeHeaders;
 541         char     *ptr;
 542         char     *part_start;
 543         char     *part_end = NULL;
 544         char     *evaluate_crlf_ptr = NULL;
 545         char     *next_boundary;
 546         char      nested_partnum[256];
 547         int       crlf_in_use = 0;
 548         int       part_seq = 0;
 549         CBufStr  *chosen_name;
 550
 551
 552         // If this is a multipart message, then recursively process it
 553         ptr = content_start;
 554         part_start = NULL;
 555         if (m->is_multipart) {
 556
 557                 // Tell the client about this message's multipartedness
 558                 if (PreMultiPartCallBack != NULL) {
 559                         PreMultiPartCallBack("",
 560                                              "",
 561                                              partnum,
 562                                              "",
 563                                              NULL,
 564                                              m->b[content_type].Key,
 565                                              m->b[charset].Key,
 566                                              0,
 567                                              m->b[encoding].Key,
 568                                              m->b[id].Key,
 569                                              userdata);
 570                 }
 571
 572                 // Figure out where the boundaries are
 573                 m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
 574                 SubMimeHeaders = InitInterestingMimes ();
 575
 576                 while ((*ptr == '\r') || (*ptr == '\n')) {
 577                         ptr++;
 578                 }
 579
 580                 if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0) {
 581                         ptr += m->b[startary].len;
 582                 }
 583
 584                 while ((*ptr == '\r') || (*ptr == '\n')) {
 585                         ptr ++;
 586                 }
 587
 588                 part_start = NULL;
 589                 do {
 590                         char *optr;
 591
 592                         optr = ptr;
 593                         if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
 594                                 break;
 595                         if ((ptr - optr > 2) && (*(ptr - 2) == '\r')) {
 596                                 crlf_in_use = 1;
 597                         }
 598
 599                         part_start = ptr;
 600
 601                         next_boundary = FindNextContent(ptr, content_end, SubMimeHeaders, m);
 602                         if ((next_boundary != NULL) && (next_boundary - part_start < 3)) {
 603                                 FlushInterestingMimes(SubMimeHeaders);
 604                                 continue;
 605                         }
 606
 607                         if ( (part_start != NULL) && (next_boundary != NULL) ) {
 608                                 part_end = next_boundary;
 609                                 --part_end;             // omit the trailing LF
 610                                 if (crlf_in_use) {
 611                                         --part_end;     // omit the trailing CR
 612                                 }
 613
 614                                 if (!IsEmptyStr(partnum)) {
 615                                         snprintf(nested_partnum,
 616                                                  sizeof nested_partnum,
 617                                                  "%s.%d", partnum,
 618                                                  ++part_seq);
 619                                 }
 620                                 else {
 621                                         snprintf(nested_partnum,
 622                                                  sizeof nested_partnum,
 623                                                  "%d", ++part_seq);
 624                                 }
 625                                 recurseable_mime_parser(nested_partnum,
 626                                                         part_start,
 627                                                         part_end,
 628                                                         CallBack,
 629                                                         PreMultiPartCallBack,
 630                                                         PostMultiPartCallBack,
 631                                                         userdata,
 632                                                         dont_decode,
 633                                                         SubMimeHeaders);
 634                         }
 635
 636                         if (next_boundary != NULL) {
 637                                 // If we pass out of scope, don't attempt to read past the end boundary.
 638                                 if ((*(next_boundary + m->b[startary].len) == '-') &&
 639                                     (*(next_boundary + m->b[startary].len + 1) == '-') ){
 640                                         ptr = content_end;
 641                                 }
 642                                 else {
 643                                         // Set up for the next part.
 644                                         part_start = strstr(next_boundary, "\n");
 645
 646                                         // Determine whether newlines are LF or CRLF
 647                                         evaluate_crlf_ptr = part_start;
 648                                         --evaluate_crlf_ptr;
 649                                         if ((*evaluate_crlf_ptr == '\r') && (*(evaluate_crlf_ptr + 1) == '\n')) {
 650                                                 crlf_in_use = 1;
 651                                         }
 652                                         else {
 653                                                 crlf_in_use = 0;
 654                                         }
 655
 656                                         // Advance past the LF ... now we're in the next part
 657                                         ++part_start;
 658                                         ptr = part_start;
 659                                 }
 660                         }
 661                         else {
 662                                 // Invalid end of multipart.  Bail out!
 663                                 ptr = content_end;
 664                         }
 665                         FlushInterestingMimes(SubMimeHeaders);
 666                 } while ( (ptr < content_end) && (next_boundary != NULL) );
 667
 668                 free(SubMimeHeaders);
 669
 670                 if (PostMultiPartCallBack != NULL) {
 671                         PostMultiPartCallBack("",
 672                                               "",
 673                                               partnum,
 674                                               "",
 675                                               NULL,
 676                                               m->b[content_type].Key,
 677                                               m->b[charset].Key,
 678                                               0,
 679                                               m->b[encoding].Key,
 680                                               m->b[id].Key,
 681                                               userdata);
 682                 }
 683         }
 684         // If it's not a multipart message, then do something with it
 685         else {
 686                 size_t length;
 687                 part_start = ptr;
 688                 length = content_end - part_start;
 689                 ptr = part_end = content_end;
 690
 691                 /* The following code will truncate the MIME part to the size
 692                  * specified by the Content-length: header.   We have commented it
 693                  * out because these headers have a tendency to be wrong.
 694                  *
 695                  *      if ( (content_length > 0) && (length > content_length) ) {
 696                  *              length = content_length;
 697                  *      }
 698                  */
 699
 700                 /* Sometimes the "name" field is tacked on to Content-type,
 701                  * and sometimes it's tacked on to Content-disposition.  Use
 702                  * whichever one we have.
 703                  */
 704                 if (m->b[content_disposition_name].len > m->b[content_type_name].len) {
 705                         chosen_name = &m->b[content_disposition_name];
 706                 }
 707                 else {
 708                         chosen_name = &m->b[content_type_name];
 709                 }
 710
 711                 // Ok, we've got a non-multipart part here, so do something with it.
 712                 mime_decode(partnum,
 713                             part_start,
 714                             length,
 715                             m->b[content_type].Key,
 716                             m->b[charset].Key,
 717                             m->b[encoding].Key,
 718                             m->b[disposition].Key,
 719                             m->b[id].Key,
 720                             chosen_name->Key,
 721                             m->b[filename].Key,
 722                             CallBack,
 723                             NULL, NULL,
 724                             userdata,
 725                             dont_decode
 726                         );
 727
 728                 /*
 729                  * Now if it's an encapsulated message/rfc822 then we have to recurse into it
 730                  */
 731                 if (!strcasecmp(&m->b[content_type].Key[0], "message/rfc822")) {
 732
 733                         if (PreMultiPartCallBack != NULL) {
 734                                 PreMultiPartCallBack("",
 735                                                      "",
 736                                                      partnum,
 737                                                      "",
 738                                                      NULL,
 739                                                      m->b[content_type].Key,
 740                                                      m->b[charset].Key,
 741                                                      0,
 742                                                      m->b[encoding].Key,
 743                                                      m->b[id].Key,
 744                                                      userdata);
 745                         }
 746                         if (CallBack != NULL) {
 747                                 if (strlen(partnum) > 0) {
 748                                         snprintf(nested_partnum,
 749                                                  sizeof nested_partnum,
 750                                                  "%s.%d", partnum,
 751                                                  ++part_seq);
 752                                 }
 753                                 else {
 754                                         snprintf(nested_partnum,
 755                                                  sizeof nested_partnum,
 756                                                  "%d", ++part_seq);
 757                                 }
 758                                 the_mime_parser(nested_partnum,
 759                                                 part_start,
 760                                                 part_end,
 761                                                 CallBack,
 762                                                 PreMultiPartCallBack,
 763                                                 PostMultiPartCallBack,
 764                                                 userdata,
 765                                                 dont_decode
 766                                         );
 767                         }
 768                         if (PostMultiPartCallBack != NULL) {
 769                                 PostMultiPartCallBack("",
 770                                                       "",
 771                                                       partnum,
 772                                                       "",
 773                                                       NULL,
 774                                                       m->b[content_type].Key,
 775                                                       m->b[charset].Key,
 776                                                       0,
 777                                                       m->b[encoding].Key,
 778                                                       m->b[id].Key,
 779                                                       userdata);
 780                         }
 781
 782
 783                 }
 784
 785         }
 786
 787 }
 788
 789
 790 // Break out the components of a multipart message
 791 // (This function expects to be fed HEADERS + CONTENT)
 792 // Note: NULL can be supplied as content_end; in this case, the message is
 793 // considered to have ended when the parser encounters a 0x00 byte.
 794 void the_mime_parser(char *partnum,
 795                      char *content_start, char *content_end,
 796                      MimeParserCallBackType CallBack,
 797                      MimeParserCallBackType PreMultiPartCallBack,
 798                      MimeParserCallBackType PostMultiPartCallBack,
 799                      void *userdata,
 800                      int dont_decode)
 801 {
 802         interesting_mime_headers *m;
 803
 804         // If the caller didn't supply an endpointer, generate one by measure
 805         if (content_end == NULL) {
 806                 content_end = &content_start[strlen(content_start)];
 807         }
 808
 809         m = InitInterestingMimes();
 810
 811         if (!parse_MimeHeaders(m, &content_start, content_end)) {
 812
 813                 recurseable_mime_parser(partnum,
 814                                         content_start, content_end,
 815                                         CallBack,
 816                                         PreMultiPartCallBack,
 817                                         PostMultiPartCallBack,
 818                                         userdata,
 819                                         dont_decode,
 820                                         m);
 821         }
 822         free(m);
 823 }
 824
 825
 826 // Entry point for the MIME parser.
 827 // (This function expects to be fed HEADERS + CONTENT)
 828 // Note: NULL can be supplied as content_end; in this case, the message is
 829 // considered to have ended when the parser encounters a 0x00 byte.
 830 void mime_parser(char *content_start,
 831                  char *content_end,
 832                  MimeParserCallBackType CallBack,
 833                  MimeParserCallBackType PreMultiPartCallBack,
 834                  MimeParserCallBackType PostMultiPartCallBack,
 835                  void *userdata,
 836                  int dont_decode)
 837 {
 838         the_mime_parser("", content_start, content_end,
 839                         CallBack,
 840                         PreMultiPartCallBack,
 841                         PostMultiPartCallBack,
 842                         userdata, dont_decode);
 843 }
 844
 845
 846 typedef struct _MimeGuess {
 847         const char *Pattern;
 848         size_t PatternLen;
 849         long PatternOffset;
 850         const char *MimeString;
 851 } MimeGuess;
 852
 853 MimeGuess MyMimes [] = {
 854         {
 855                 "GIF",
 856                 3,
 857                 0,
 858                 "image/gif"
 859         },
 860         {
 861                 "\xff\xd8",
 862                 2,
 863                 0,
 864                 "image/jpeg"
 865         },
 866         {
 867                 "\x89PNG",
 868                 4,
 869                 0,
 870                 "image/png"
 871         },
 872         { // last...
 873                 "",
 874                 0,
 875                 0,
 876                 ""
 877         }
 878 };
 879
 880
 881 const char *GuessMimeType(const char *data, size_t dlen) {
 882         int MimeIndex = 0;
 883
 884         while (MyMimes[MimeIndex].PatternLen != 0) {
 885                 if ((MyMimes[MimeIndex].PatternLen +
 886                      MyMimes[MimeIndex].PatternOffset < dlen) &&
 887                     strncmp(MyMimes[MimeIndex].Pattern,
 888                             &data[MyMimes[MimeIndex].PatternOffset],
 889                             MyMimes[MimeIndex].PatternLen) == 0)
 890                 {
 891                         return MyMimes[MimeIndex].MimeString;
 892                 }
 893                 MimeIndex ++;
 894         }
 895         /*
 896          * ok, our simple minded algorythm didn't find anything,
 897          * let the big chegger try it, he wil default to application/octet-stream
 898          */
 899         return (xdg_mime_get_mime_type_for_data(data, dlen));
 900 }
 901
 902
 903 const char* GuessMimeByFilename(const char *what, size_t len) {
 904         // we know some hardcoded on our own, try them...
 905         if ((len > 3) && !strncasecmp(&what[len - 4], ".gif", 4))
 906                 return "image/gif";
 907         else if ((len > 2) && !strncasecmp(&what[len - 3], ".js", 3))
 908                 return  "text/javascript";
 909         else if ((len > 3) && !strncasecmp(&what[len - 4], ".txt", 4))
 910                 return "text/plain";
 911         else if ((len > 3) && !strncasecmp(&what[len - 4], ".css", 4))
 912                 return "text/css";
 913         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htc", 4))
 914                 return "text/x-component";
 915         else if ((len > 3) && !strncasecmp(&what[len - 4], ".jpg", 4))
 916                 return "image/jpeg";
 917         else if ((len > 4) && !strncasecmp(&what[len - 5], ".jpeg", 5))
 918                 return "image/jpeg";
 919         else if ((len > 3) && !strncasecmp(&what[len - 4], ".png", 4))
 920                 return "image/png";
 921         else if ((len > 3) && !strncasecmp(&what[len - 4], ".ico", 4))
 922                 return "image/x-icon";
 923         else if ((len > 3) && !strncasecmp(&what[len - 4], ".vcf", 4))
 924                 return "text/x-vcard";
 925         else if ((len > 4) && !strncasecmp(&what[len - 5], ".html", 5))
 926                 return "text/html";
 927         else if ((len > 3) && !strncasecmp(&what[len - 4], ".htm", 4))
 928                 return "text/html";
 929         else if ((len > 3) && !strncasecmp(&what[len - 4], ".wml", 4))
 930                 return "text/vnd.wap.wml";
 931         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmls", 5))
 932                 return "text/vnd.wap.wmlscript";
 933         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wmlc", 5))
 934                 return "application/vnd.wap.wmlc";
 935         else if ((len > 5) && !strncasecmp(&what[len - 6], ".wmlsc", 6))
 936                 return "application/vnd.wap.wmlscriptc";
 937         else if ((len > 4) && !strncasecmp(&what[len - 5], ".wbmp", 5))
 938                 return "image/vnd.wap.wbmp";
 939         else
 940                 // and let xdgmime do the fallback.
 941                 return xdg_mime_get_mime_type_from_file_name(what);
 942 }
 943
 944 static HashList *IconHash = NULL;
 945
 946 typedef struct IconName IconName;
 947
 948 struct IconName {
 949         char *FlatName;
 950         char *FileName;
 951 };
 952
 953
 954 static void DeleteIcon(void *IconNamePtr) {
 955         IconName *Icon = (IconName*) IconNamePtr;
 956         free(Icon->FlatName);
 957         free(Icon->FileName);
 958         free(Icon);
 959 }
 960
 961
 962 #define GENSTR "x-generic"
 963 #define IGNORE_PREFIX_1 "gnome-mime"
 964 int LoadIconDir(const char *DirName) {
 965         DIR *filedir = NULL;
 966         struct dirent *filedir_entry;
 967         int d_namelen;
 968         int d_without_ext;
 969         IconName *Icon;
 970
 971         filedir = opendir (DirName);
 972         IconHash = NewHash(1, NULL);
 973         if (filedir == NULL) {
 974                 return 0;
 975         }
 976
 977         while ((filedir_entry = readdir(filedir))) {
 978                 char *MinorPtr;
 979                 char *PStart;
 980 #ifdef _DIRENT_HAVE_D_NAMLEN
 981                 d_namelen = filedir_entry->d_namlen;
 982 #else
 983                 d_namelen = strlen(filedir_entry->d_name);
 984 #endif
 985                 d_without_ext = d_namelen;
 986                 while ((d_without_ext > 0) && (filedir_entry->d_name[d_without_ext] != '.'))
 987                         d_without_ext --;
 988                 if ((d_without_ext == 0) || (d_namelen < 3))
 989                         continue;
 990
 991                 if ((sizeof(IGNORE_PREFIX_1) < d_namelen) &&
 992                     (strncmp(IGNORE_PREFIX_1,
 993                              filedir_entry->d_name,
 994                              sizeof(IGNORE_PREFIX_1) - 1) == 0)) {
 995                         PStart = filedir_entry->d_name + sizeof(IGNORE_PREFIX_1);
 996                         d_without_ext -= sizeof(IGNORE_PREFIX_1);
 997                 }
 998                 else {
 999                         PStart = filedir_entry->d_name;
1000                 }
1001                 Icon = malloc(sizeof(IconName));
1002
1003                 Icon->FileName = malloc(d_namelen + 1);
1004                 memcpy(Icon->FileName, filedir_entry->d_name, d_namelen + 1);
1005
1006                 Icon->FlatName = malloc(d_without_ext + 1);
1007                 memcpy(Icon->FlatName, PStart, d_without_ext);
1008                 Icon->FlatName[d_without_ext] = '\0';
1009                 // Try to find Minor type in image-jpeg
1010                 MinorPtr = strchr(Icon->FlatName, '-');
1011                 if (MinorPtr != NULL) {
1012                         size_t MinorLen;
1013                         MinorLen = 1 + d_without_ext - (MinorPtr - Icon->FlatName + 1);
1014                         if ((MinorLen == sizeof(GENSTR)) &&
1015                             (strncmp(MinorPtr + 1, GENSTR, sizeof(GENSTR)) == 0)) {
1016                                 // ok, we found a generic filename. cut the generic.
1017                                 *MinorPtr = '\0';
1018                                 d_without_ext = d_without_ext - (MinorPtr - Icon->FlatName);
1019                         }
1020                         else { // Map the major / minor separator to /
1021                                 *MinorPtr = '/';
1022                         }
1023                 }
1024
1025                 Put(IconHash, Icon->FlatName, d_without_ext, Icon, DeleteIcon);
1026         }
1027         closedir(filedir);
1028         return 1;
1029 }
1030
1031
1032 const char *GetIconFilename(char *MimeType, size_t len) {
1033         void *vIcon;
1034         IconName *Icon;
1035
1036         if (IconHash == NULL) {
1037                 return NULL;
1038         }
1039
1040         GetHash(IconHash, MimeType, len, &vIcon), Icon = (IconName*) vIcon;
1041         // didn't find the exact mimetype? try major only.
1042         if (Icon == NULL) {
1043                 char * pMinor;
1044                 pMinor = strchr(MimeType, '/');
1045                 if (pMinor != NULL) {
1046                         *pMinor = '\0';
1047                         GetHash(IconHash, MimeType, pMinor - MimeType, &vIcon),
1048                                 Icon = (IconName*) vIcon;
1049                 }
1050         }
1051         if (Icon == NULL) {
1052                 return NULL;
1053         }
1054
1055         return Icon->FileName;
1056 }
1057
1058
1059 void ShutDownLibCitadelMime(void) {
1060         DeleteHash(&IconHash);
1061 }