webcit/mime_parser.c

   1 /*
   2  * $Id$
   3  */
   4 /**
   5  * \defgroup MIME This is the MIME parser for Citadel.
   6  *
   7  * Copyright (c) 1998-2005 by Art Cancro
   8  * This code is distributed under the terms of the GNU General Public License.
   9  * \ingroup WebcitHttpServer
  10  */
  11 /*@{*/
  12 #include "webcit.h"
  13 #include "webserver.h"
  14 #include "mime_parser.h"
  15
  16
  17
  18 void extract_key(char *target, char *source, char *key)
  19 {
  20         int a, b;
  21
  22         strcpy(target, source);
  23         for (a = 0; a < strlen(target); ++a) {
  24                 if ((!strncasecmp(&target[a], key, strlen(key)))
  25                     && (target[a + strlen(key)] == '=')) {
  26                         strcpy(target, &target[a + strlen(key) + 1]);
  27                         if (target[0] == 34)
  28                                 strcpy(target, &target[1]);
  29                         for (b = 0; b < strlen(target); ++b)
  30                                 if (target[b] == 34)
  31                                         target[b] = 0;
  32                         return;
  33                 }
  34         }
  35         strcpy(target, "");
  36 }
  37
  38
  39 /*
  40  * For non-multipart messages, we need to generate a quickie partnum of "1"
  41  * to return to callback functions.  Some callbacks demand it.
  42  */
  43 char *fixed_partnum(char *supplied_partnum) {
  44         if (supplied_partnum == NULL) return "1";
  45         if (strlen(supplied_partnum)==0) return "1";
  46         return supplied_partnum;
  47 }
  48
  49
  50
  51 /*
  52  * Convert "quoted-printable" to binary.  Returns number of bytes decoded.
  53  * according to RFC2045 section 6.7
  54  */
  55 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
  56         unsigned int ch;
  57         int decoded_length = 0;
  58         int pos = 0;
  59
  60         while (pos < sourcelen)
  61         {
  62                 if (!strncmp(&encoded[pos], "=\r\n", 3))
  63                 {
  64                         pos += 3;
  65                 }
  66                 else if (!strncmp(&encoded[pos], "=\n", 2))
  67                 {
  68                         pos += 2;
  69                 }
  70                 else if (encoded[pos] == '=')
  71                 {
  72                         ch = 0;
  73                         sscanf(&encoded[pos+1], "%02x", &ch);
  74                         pos += 3;
  75                         decoded[decoded_length++] = ch;
  76                 }
  77                 else
  78                 {
  79                         decoded[decoded_length++] = encoded[pos];
  80                         pos += 1;
  81                 }
  82         }
  83         decoded[decoded_length] = 0;
  84         return(decoded_length);
  85 }
  86
  87
  88 /*
  89  * Given a message or message-part body and a length, handle any necessary
  90  * decoding and pass the request up the stack.
  91  */
  92 void mime_decode(char *partnum,
  93                  char *part_start, size_t length,
  94                  char *content_type, char *charset, char *encoding,
  95                  char *disposition,
  96                  char *name, char *filename,
  97                  void (*CallBack)
  98                   (char *cbname,
  99                    char *cbfilename,
 100                    char *cbpartnum,
 101                    char *cbdisp,
 102                    void *cbcontent,
 103                    char *cbtype,
 104                    char *cbcharset,
 105                    size_t cblength,
 106                    char *cbencoding,
 107                    void *cbuserdata),
 108                  void (*PreMultiPartCallBack)
 109                   (char *cbname,
 110                    char *cbfilename,
 111                    char *cbpartnum,
 112                    char *cbdisp,
 113                    void *cbcontent,
 114                    char *cbtype,
 115                    char *cbcharset,
 116                    size_t cblength,
 117                    char *cbencoding,
 118                    void *cbuserdata),
 119                  void (*PostMultiPartCallBack)
 120                   (char *cbname,
 121                    char *cbfilename,
 122                    char *cbpartnum,
 123                    char *cbdisp,
 124                    void *cbcontent,
 125                    char *cbtype,
 126                    char *cbcharset,
 127                    size_t cblength,
 128                    char *cbencoding,
 129                    void *cbuserdata),
 130                   void *userdata,
 131                   int dont_decode
 132 )
 133 {
 134
 135         char *decoded;
 136         size_t bytes_decoded = 0;
 137
 138         /* Some encodings aren't really encodings */
 139         if (!strcasecmp(encoding, "7bit"))
 140                 strcpy(encoding, "");
 141         if (!strcasecmp(encoding, "8bit"))
 142                 strcpy(encoding, "");
 143         if (!strcasecmp(encoding, "binary"))
 144                 strcpy(encoding, "");
 145
 146         /* If this part is not encoded, send as-is */
 147         if ( (strlen(encoding) == 0) || (dont_decode)) {
 148                 if (CallBack != NULL) {
 149                         CallBack(name, filename, fixed_partnum(partnum),
 150                                 disposition, part_start,
 151                                 content_type, charset, length, encoding, userdata);
 152                         }
 153                 return;
 154         }
 155
 156         /* Fail silently if we hit an unknown encoding. */
 157         if ((strcasecmp(encoding, "base64"))
 158             && (strcasecmp(encoding, "quoted-printable"))) {
 159                 return;
 160         }
 161
 162         /*
 163          * Allocate a buffer for the decoded data.  The output buffer is slightly
 164          * larger than the input buffer; this assumes that the decoded data
 165          * will never be significantly larger than the encoded data.  This is a
 166          * safe assumption with base64, uuencode, and quoted-printable.
 167          */
 168         decoded = malloc(length + 32768);
 169         if (decoded == NULL) {
 170                 return;
 171         }
 172
 173         if (!strcasecmp(encoding, "base64")) {
 174                 bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
 175         }
 176         else if (!strcasecmp(encoding, "quoted-printable")) {
 177                 bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
 178         }
 179
 180         if (bytes_decoded > 0) if (CallBack != NULL) {
 181                 CallBack(name, filename, fixed_partnum(partnum),
 182                         disposition, decoded,
 183                         content_type, charset, bytes_decoded, "binary", userdata);
 184         }
 185
 186         free(decoded);
 187 }
 188
 189 /*
 190  * Break out the components of a multipart message
 191  * (This function expects to be fed HEADERS + CONTENT)
 192  * Note: NULL can be supplied as content_end; in this case, the message is
 193  * considered to have ended when the parser encounters a 0x00 byte.
 194  */
 195 void the_mime_parser(char *partnum,
 196                      char *content_start, char *content_end,
 197                      void (*CallBack)
 198                       (char *cbname,
 199                        char *cbfilename,
 200                        char *cbpartnum,
 201                        char *cbdisp,
 202                        void *cbcontent,
 203                        char *cbtype,
 204                        char *cbcharset,
 205                        size_t cblength,
 206                        char *cbencoding,
 207                        void *cbuserdata),
 208                      void (*PreMultiPartCallBack)
 209                       (char *cbname,
 210                        char *cbfilename,
 211                        char *cbpartnum,
 212                        char *cbdisp,
 213                        void *cbcontent,
 214                        char *cbtype,
 215                        char *cbcharset,
 216                        size_t cblength,
 217                        char *cbencoding,
 218                        void *cbuserdata),
 219                      void (*PostMultiPartCallBack)
 220                       (char *cbname,
 221                        char *cbfilename,
 222                        char *cbpartnum,
 223                        char *cbdisp,
 224                        void *cbcontent,
 225                        char *cbtype,
 226                        char *cbcharset,
 227                        size_t cblength,
 228                        char *cbencoding,
 229                        void *cbuserdata),
 230                       void *userdata,
 231                       int dont_decode
 232 )
 233 {
 234
 235         char *ptr;
 236         char *srch = NULL;
 237         char *part_start, *part_end = NULL;
 238         char buf[SIZ];
 239         char *header;
 240         char *boundary;
 241         char *startary;
 242         size_t startary_len = 0;
 243         char *endary;
 244         char *next_boundary;
 245         char *content_type;
 246         char *charset;
 247         size_t content_length;
 248         char *encoding;
 249         char *disposition;
 250         char *name = NULL;
 251         char *content_type_name;
 252         char *content_disposition_name;
 253         char *filename;
 254         int is_multipart;
 255         int part_seq = 0;
 256         int i;
 257         size_t length;
 258         char nested_partnum[SIZ];
 259
 260         ptr = content_start;
 261         content_length = 0;
 262
 263         boundary = malloc(SIZ);
 264         memset(boundary, 0, SIZ);
 265
 266         startary = malloc(SIZ);
 267         memset(startary, 0, SIZ);
 268
 269         endary = malloc(SIZ);
 270         memset(endary, 0, SIZ);
 271
 272         header = malloc(SIZ);
 273         memset(header, 0, SIZ);
 274
 275         content_type = malloc(SIZ);
 276         memset(content_type, 0, SIZ);
 277
 278         charset = malloc(SIZ);
 279         memset(charset, 0, SIZ);
 280
 281         encoding = malloc(SIZ);
 282         memset(encoding, 0, SIZ);
 283
 284         content_type_name = malloc(SIZ);
 285         memset(content_type_name, 0, SIZ);
 286
 287         content_disposition_name = malloc(SIZ);
 288         memset(content_disposition_name, 0, SIZ);
 289
 290         filename = malloc(SIZ);
 291         memset(filename, 0, SIZ);
 292
 293         disposition = malloc(SIZ);
 294         memset(disposition, 0, SIZ);
 295
 296         /* If the caller didn't supply an endpointer, generate one by measure */
 297         if (content_end == NULL) {
 298                 content_end = &content_start[strlen(content_start)];
 299         }
 300
 301         /* Learn interesting things from the headers */
 302         strcpy(header, "");
 303         do {
 304                 ptr = memreadline(ptr, buf, SIZ);
 305                 if (ptr >= content_end) {
 306                         goto end_parser;
 307                 }
 308
 309                 for (i = 0; i < strlen(buf); ++i) {
 310                         if (isspace(buf[i])) {
 311                                 buf[i] = ' ';
 312                         }
 313                 }
 314
 315                 if (!isspace(buf[0])) {
 316                         if (!strncasecmp(header, "Content-type: ", 14)) {
 317                                 strcpy(content_type, &header[14]);
 318                                 extract_key(content_type_name, content_type, "name");
 319                                 extract_key(charset, content_type, "charset");
 320                                 /* Deal with weird headers */
 321                                 if (strchr(content_type, ' '))
 322                                         *(strchr(content_type, ' ')) = '\0';
 323                                 if (strchr(content_type, ';'))
 324                                         *(strchr(content_type, ';')) = '\0';
 325                         }
 326                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
 327                                 strcpy(disposition, &header[21]);
 328                                 extract_key(content_disposition_name, disposition, "name");
 329                                 extract_key(filename, disposition, "filename");
 330                         }
 331                         if (!strncasecmp(header, "Content-length: ", 16)) {
 332                                 content_length = (size_t) atol(&header[16]);
 333                         }
 334                         if (!strncasecmp(header,
 335                                       "Content-transfer-encoding: ", 27))
 336                                 strcpy(encoding, &header[27]);
 337                         if (strlen(boundary) == 0)
 338                                 extract_key(boundary, header, "boundary");
 339                         strcpy(header, "");
 340                 }
 341                 if ((strlen(header) + strlen(buf) + 2) < SIZ)
 342                         strcat(header, buf);
 343         } while ((strlen(buf) > 0) && (*ptr != 0));
 344
 345         if (strchr(disposition, ';'))
 346                 *(strchr(disposition, ';')) = '\0';
 347         striplt(disposition);
 348         if (strchr(content_type, ';'))
 349                 *(strchr(content_type, ';')) = '\0';
 350         striplt(content_type);
 351
 352         if (strlen(boundary) > 0) {
 353                 is_multipart = 1;
 354         } else {
 355                 is_multipart = 0;
 356         }
 357
 358         /* If this is a multipart message, then recursively process it */
 359         part_start = NULL;
 360         if (is_multipart) {
 361
 362                 /* Tell the client about this message's multipartedness */
 363                 if (PreMultiPartCallBack != NULL) {
 364                         PreMultiPartCallBack("", "", partnum, "",
 365                                 NULL, content_type, charset,
 366                                 0, encoding, userdata);
 367                 }
 368
 369                 /* Figure out where the boundaries are */
 370                 snprintf(startary, SIZ, "--%s", boundary);
 371                 snprintf(endary, SIZ, "--%s--", boundary);
 372                 startary_len = strlen(startary);
 373
 374                 part_start = NULL;
 375                 do {
 376                         next_boundary = NULL;
 377                         for (srch=ptr; srch<content_end; ++srch) {
 378                                 if (!memcmp(srch, startary, startary_len)) {
 379                                         next_boundary = srch;
 380                                         srch = content_end;
 381                                 }
 382                         }
 383
 384                         if ( (part_start != NULL) && (next_boundary != NULL) ) {
 385                                 part_end = next_boundary;
 386                                 --part_end;
 387
 388                                 if (strlen(partnum) > 0) {
 389                                         snprintf(nested_partnum,
 390                                                  sizeof nested_partnum,
 391                                                  "%s.%d", partnum,
 392                                                  ++part_seq);
 393                                 }
 394                                 else {
 395                                         snprintf(nested_partnum,
 396                                                  sizeof nested_partnum,
 397                                                  "%d", ++part_seq);
 398                                 }
 399                                 the_mime_parser(nested_partnum,
 400                                             part_start, part_end,
 401                                                 CallBack,
 402                                                 PreMultiPartCallBack,
 403                                                 PostMultiPartCallBack,
 404                                                 userdata,
 405                                                 dont_decode);
 406                         }
 407
 408                         if (next_boundary != NULL) {
 409                                 /* If we pass out of scope, don't attempt to
 410                                  * read past the end boundary. */
 411                                 if (!strcmp(next_boundary, endary)) {
 412                                         ptr = content_end;
 413                                 }
 414                                 else {
 415                                         /* Set up for the next part. */
 416                                         part_start = strstr(next_boundary, "\n");
 417                                         ++part_start;
 418                                         ptr = part_start;
 419                                 }
 420                         }
 421                         else {
 422                                 /* Invalid end of multipart.  Bail out! */
 423                                 ptr = content_end;
 424                         }
 425                 } while ( (ptr < content_end) && (next_boundary != NULL) );
 426
 427                 if (PostMultiPartCallBack != NULL) {
 428                         PostMultiPartCallBack("", "", partnum, "", NULL,
 429                                 content_type, charset, 0, encoding, userdata);
 430                 }
 431                 goto end_parser;
 432         }
 433
 434         /* If it's not a multipart message, then do something with it */
 435         if (!is_multipart) {
 436                 part_start = ptr;
 437                 length = 0;
 438                 while (ptr < content_end) {
 439                         ++ptr;
 440                         ++length;
 441                 }
 442                 part_end = content_end;
 443
 444                 /******
 445                  * I thought there was an off-by-one error here, but there isn't.
 446                  * This probably means that there's an off-by-one error somewhere
 447                  * else ... or maybe only in certain messages?
 448                 --part_end;
 449                 --length;
 450                 ******/
 451
 452                 /* Truncate if the header told us to */
 453                 if ( (content_length > 0) && (length > content_length) ) {
 454                         length = content_length;
 455                 }
 456
 457                 /* Sometimes the "name" field is tacked on to Content-type,
 458                  * and sometimes it's tacked on to Content-disposition.  Use
 459                  * whichever one we have.
 460                  */
 461                 if (strlen(content_disposition_name) > strlen(content_type_name)) {
 462                         name = content_disposition_name;
 463                 }
 464                 else {
 465                         name = content_type_name;
 466                 }
 467
 468                 /* lprintf(CTDL_DEBUG, "mime_decode part=%s, len=%d, type=%s, charset=%s, encoding=%s\n",
 469                         partnum, length, content_type, charset, encoding); */
 470
 471                 /* Ok, we've got a non-multipart part here, so do something with it.
 472                  */
 473                 mime_decode(partnum,
 474                         part_start, length,
 475                         content_type, charset, encoding, disposition,
 476                         name, filename,
 477                         CallBack, NULL, NULL,
 478                         userdata, dont_decode
 479                 );
 480
 481                 /*
 482                  * Now if it's an encapsulated message/rfc822 then we have to recurse into it
 483                  */
 484                 if (!strcasecmp(content_type, "message/rfc822")) {
 485
 486                         if (PreMultiPartCallBack != NULL) {
 487                                 PreMultiPartCallBack("", "", partnum, "",
 488                                         NULL, content_type, charset,
 489                                         0, encoding, userdata);
 490                         }
 491                         if (CallBack != NULL) {
 492                                 if (strlen(partnum) > 0) {
 493                                         snprintf(nested_partnum,
 494                                                  sizeof nested_partnum,
 495                                                  "%s.%d", partnum,
 496                                                  ++part_seq);
 497                                 }
 498                                 else {
 499                                         snprintf(nested_partnum,
 500                                                  sizeof nested_partnum,
 501                                                  "%d", ++part_seq);
 502                                 }
 503                                 the_mime_parser(nested_partnum,
 504                                         part_start, part_end,
 505                                         CallBack,
 506                                         PreMultiPartCallBack,
 507                                         PostMultiPartCallBack,
 508                                         userdata,
 509                                         dont_decode
 510                                 );
 511                         }
 512                         if (PostMultiPartCallBack != NULL) {
 513                                 PostMultiPartCallBack("", "", partnum, "", NULL,
 514                                         content_type, charset, 0, encoding, userdata);
 515                         }
 516
 517
 518                 }
 519
 520         }
 521
 522 end_parser:     /* free the buffers!  end the oppression!! */
 523         free(boundary);
 524         free(startary);
 525         free(endary);
 526         free(header);
 527         free(content_type);
 528         free(charset);
 529         free(encoding);
 530         free(content_type_name);
 531         free(content_disposition_name);
 532         free(filename);
 533         free(disposition);
 534 }
 535
 536
 537
 538 /*
 539  * Entry point for the MIME parser.
 540  * (This function expects to be fed HEADERS + CONTENT)
 541  * Note: NULL can be supplied as content_end; in this case, the message is
 542  * considered to have ended when the parser encounters a 0x00 byte.
 543  */
 544 void mime_parser(char *content_start,
 545                 char *content_end,
 546
 547                  void (*CallBack)
 548                   (char *cbname,
 549                    char *cbfilename,
 550                    char *cbpartnum,
 551                    char *cbdisp,
 552                    void *cbcontent,
 553                    char *cbtype,
 554                    char *cbcharset,
 555                    size_t cblength,
 556                    char *cbencoding,
 557                    void *cbuserdata),
 558
 559                  void (*PreMultiPartCallBack)
 560                   (char *cbname,
 561                    char *cbfilename,
 562                    char *cbpartnum,
 563                    char *cbdisp,
 564                    void *cbcontent,
 565                    char *cbtype,
 566                    char *cbcharset,
 567                    size_t cblength,
 568                    char *cbencoding,
 569                    void *cbuserdata),
 570
 571                  void (*PostMultiPartCallBack)
 572                   (char *cbname,
 573                    char *cbfilename,
 574                    char *cbpartnum,
 575                    char *cbdisp,
 576                    void *cbcontent,
 577                    char *cbtype,
 578                    char *cbcharset,
 579                    size_t cblength,
 580                    char *cbencoding,
 581                    void *cbuserdata),
 582
 583                   void *userdata,
 584                   int dont_decode
 585 )
 586 {
 587
 588         the_mime_parser("", content_start, content_end,
 589                         CallBack,
 590                         PreMultiPartCallBack,
 591                         PostMultiPartCallBack,
 592                         userdata, dont_decode);
 593 }
 594
 595
 596
 597 /*@}*/