X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=webcit%2Fmime_parser.c;h=02edd1c5893b93c0fa3a2ba97f3dcc9b6eb4ae51;hb=694a3ea878536e2deda1c0168e51837a31b81af7;hp=b8aed433c3df41b730f31954a014161188475c69;hpb=371fe79500d68a15c6cc0559daff11352dca5cfa;p=citadel.git diff --git a/webcit/mime_parser.c b/webcit/mime_parser.c index b8aed433c..02edd1c58 100644 --- a/webcit/mime_parser.c +++ b/webcit/mime_parser.c @@ -1,178 +1,662 @@ /* - * mime_parser.c + * $Id$ + */ +/** + * \defgroup MIME This is the MIME parser for Citadel. * - * This is a really bad attempt at writing a parser to handle multipart - * messages -- in the case of WebCit, a form containing uploaded files. + * Copyright (c) 1998-2005 by Art Cancro + * This code is distributed under the terms of the GNU General Public License. + * \ingroup WebcitHttpServer */ - -#include -#include -#include -#include -#include -#include -#include +/*@{*/ #include "webcit.h" -#include "child.h" +#include "webserver.h" +#include "mime_parser.h" -/* - * The very back end for the component handler - * (This function expects to be fed CONTENT ONLY, no headers) +/** + * \brief get mime key + * \param target where to put the mime buffer at??? + * \param source where to extract the mimetype from + * \param key what??? */ -void do_something_with_it(char *content, int length, char *content_type) { - char filename[256]; - int a; - static char partno = 0; - FILE *fp; - - /* Nested multipart gets recursively fed back into the parser */ - if (!strncasecmp(content_type, "multipart", 9)) { - mime_parser(content, length, content_type); - } - - /* If all else fails, save the component to disk (FIX) */ - else { - sprintf(filename, "content.%04x.%04x.%s", - getpid(), ++partno, content_type); - for (a=0; a 0) { + if (!strcmp(buf, "=")) { + soft_line_break = 1; + strcpy(buf, ""); + } else if ((strlen(buf)>=3) && (buf[0]=='=')) { + sscanf(&buf[1], "%02x", &ch); + decoded[decoded_length++] = ch; + strcpy(buf, &buf[3]); + } else { + decoded[decoded_length++] = buf[0]; + strcpy(buf, &buf[1]); } - ++start; - --part_length; - } while((buf[strlen(buf)-1] != 10) && (part_length>0)); - if (part_length <= 0) return; - buf[strlen(buf)-1] = 0; - if (buf[strlen(buf)-1]==13) { - buf[strlen(buf)-1] = 0; - crlf = 1; - } - if (!strncasecmp(buf, "Content-type: ", 14)) { - strcpy(content_type, &buf[14]); - } - } while (strlen(buf)>0); + } + if (soft_line_break == 0) { + decoded[decoded_length++] = '\r'; + decoded[decoded_length++] = '\n'; + } + buf_length = 0; + /*** end -- process one line ***/ + } + } + + decoded[decoded_length++] = 0; + return(decoded_length); +} + +/** + * \brief fully decode a message + * Given a message or message-part body and a length, handle any necessary + * decoding and pass the request up the stack. + * \param partnum todo ????? + * \param part_start todo + * \param length todo + * \param content_type todo + * \param charset todo + * \param encoding todo + * \param disposition todo + * \param name todo + * \param filename todo + * \param CallBack todo + * \param PreMultiPartCallBack todo + * \param PostMultiPartCallBack todo + * \param userdata todo + * \param dont_decode todo + */ +void mime_decode(char *partnum, + char *part_start, size_t length, + char *content_type, char *charset, char *encoding, + char *disposition, + char *name, char *filename, + void (*CallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + void (*PreMultiPartCallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + void (*PostMultiPartCallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + void *userdata, + int dont_decode +) +{ + + char *decoded; + size_t bytes_decoded = 0; + + /* Some encodings aren't really encodings */ + if (!strcasecmp(encoding, "7bit")) + strcpy(encoding, ""); + if (!strcasecmp(encoding, "8bit")) + strcpy(encoding, ""); + if (!strcasecmp(encoding, "binary")) + strcpy(encoding, ""); + + /* If this part is not encoded, send as-is */ + if ( (strlen(encoding) == 0) || (dont_decode)) { + if (CallBack != NULL) { + CallBack(name, filename, fixed_partnum(partnum), + disposition, part_start, + content_type, charset, length, encoding, userdata); + } + return; + } - if (crlf) actual_length = part_length - 2; - else actual_length = part_length - 1; + if ((strcasecmp(encoding, "base64")) + && (strcasecmp(encoding, "quoted-printable"))) { + return; + } + /** + * Allocate a buffer for the decoded data. The output buffer is the + * same size as the input buffer; this assumes that the decoded data + * will never be larger than the encoded data. This is a safe + * assumption with base64, uuencode, and quoted-printable. + */ + decoded = malloc(length+2048); + if (decoded == NULL) { + return; + } - /* Now that we've got this component isolated, what to do with it? */ - do_something_with_it(start, actual_length, content_type); + if (!strcasecmp(encoding, "base64")) { + bytes_decoded = CtdlDecodeBase64(decoded, part_start, length); + } + else if (!strcasecmp(encoding, "quoted-printable")) { + bytes_decoded = CtdlDecodeQuotedPrintable(decoded, + part_start, length); + } + if (bytes_decoded > 0) if (CallBack != NULL) { + CallBack(name, filename, fixed_partnum(partnum), + disposition, decoded, + content_type, charset, bytes_decoded, "binary", userdata); } - -/* - * Break out the components of a multipart message - * (This function expects to be fed CONTENT ONLY, no headers) + free(decoded); +} + +/** + * \brief Break out the components of a multipart message + * (This function expects to be fed HEADERS + CONTENT) + * Note: NULL can be supplied as content_end; in this case, the message is + * considered to have ended when the parser encounters a 0x00 byte. + * \param partnum todo + * \param content_start todo ????? + * \param content_end todo + * \param CallBack todo + * \param PreMultiPartCallBack + * \param PostMultiPartCallBack + * \param userdata todo + * \param dont_decode todo */ -void mime_parser(char *content, int ContentLength, char *ContentType) { - char boundary[256]; - char endary[256]; - int have_boundary = 0; - int a; +void the_mime_parser(char *partnum, + char *content_start, char *content_end, + void (*CallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + void (*PreMultiPartCallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + void (*PostMultiPartCallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + void *userdata, + int dont_decode +) +{ + char *ptr; - char *beginning; - int bytes_processed = 0; - int part_length; + char *srch = NULL; + char *part_start, *part_end = NULL; + char buf[SIZ]; + char *header; + char *boundary; + char *startary; + size_t startary_len = 0; + char *endary; + char *next_boundary; + char *content_type; + char *charset; + size_t content_length; + char *encoding; + char *disposition; + char *name = NULL; + char *content_type_name; + char *content_disposition_name; + char *filename; + int is_multipart; + int part_seq = 0; + int i; + size_t length; + char nested_partnum[SIZ]; - fprintf(stderr, "MIME: ContentLength: %d, ContentType: %s\n", - ContentLength, ContentType); + ptr = content_start; + content_length = 0; - /* If it's not multipart, don't process it as multipart */ - if (strncasecmp(ContentType, "multipart", 9)) { - do_something_with_it(content, ContentLength, ContentType); - return; + boundary = malloc(SIZ); + memset(boundary, 0, SIZ); + + startary = malloc(SIZ); + memset(startary, 0, SIZ); + + endary = malloc(SIZ); + memset(endary, 0, SIZ); + + header = malloc(SIZ); + memset(header, 0, SIZ); + + content_type = malloc(SIZ); + memset(content_type, 0, SIZ); + + charset = malloc(SIZ); + memset(charset, 0, SIZ); + + encoding = malloc(SIZ); + memset(encoding, 0, SIZ); + + content_type_name = malloc(SIZ); + memset(content_type_name, 0, SIZ); + + content_disposition_name = malloc(SIZ); + memset(content_disposition_name, 0, SIZ); + + filename = malloc(SIZ); + memset(filename, 0, SIZ); + + disposition = malloc(SIZ); + memset(disposition, 0, SIZ); + + /** If the caller didn't supply an endpointer, generate one by measure */ + if (content_end == NULL) { + content_end = &content_start[strlen(content_start)]; + } + + /** Learn interesting things from the headers */ + strcpy(header, ""); + do { + ptr = memreadline(ptr, buf, SIZ); + if (ptr >= content_end) { + goto end_parser; + } + + for (i = 0; i < strlen(buf); ++i) { + if (isspace(buf[i])) { + buf[i] = ' '; + } } - /* Figure out what the boundary is */ - strcpy(boundary, ContentType); - for (a=0; a 0) && (*ptr != 0)); - /* We can't process multipart messages without a boundary. */ - if (have_boundary == 0) return; - strcpy(endary, boundary); - strcat(endary, "--"); + if (strchr(disposition, ';')) + *(strchr(disposition, ';')) = '\0'; + striplt(disposition); + if (strchr(content_type, ';')) + *(strchr(content_type, ';')) = '\0'; + striplt(content_type); - ptr = content; + if (strlen(boundary) > 0) { + is_multipart = 1; + } else { + is_multipart = 0; + } - /* Seek to the beginning of the next boundary */ - while (bytes_processed < ContentLength) { - /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */ + /** If this is a multipart message, then recursively process it */ + part_start = NULL; + if (is_multipart) { - if (strncasecmp(ptr, boundary, strlen(boundary))) { - ++ptr; - ++bytes_processed; + /** Tell the client about this message's multipartedness */ + if (PreMultiPartCallBack != NULL) { + PreMultiPartCallBack("", "", partnum, "", + NULL, content_type, charset, + 0, encoding, userdata); + } + + /** Figure out where the boundaries are */ + snprintf(startary, SIZ, "--%s", boundary); + snprintf(endary, SIZ, "--%s--", boundary); + startary_len = strlen(startary); + + part_start = NULL; + do { + next_boundary = NULL; + for (srch=ptr; srch 0) { + snprintf(nested_partnum, + sizeof nested_partnum, + "%s.%d", partnum, + ++part_seq); + } + else { + snprintf(nested_partnum, + sizeof nested_partnum, + "%d", ++part_seq); + } + the_mime_parser(nested_partnum, + part_start, part_end, + CallBack, + PreMultiPartCallBack, + PostMultiPartCallBack, + userdata, + dont_decode); } - /* Seek to the end of the boundary string */ - if (!strncasecmp(ptr, boundary, strlen(boundary))) { - while ( (bytes_processed < ContentLength) - && (strncasecmp(ptr, "\n", 1)) ) { - ++ptr; - ++bytes_processed; + if (next_boundary != NULL) { + /** + * If we pass out of scope, don't attempt to + * read past the end boundary. */ + if (!strcmp(next_boundary, endary)) { + ptr = content_end; } - beginning = ptr; - part_length = 0; - while ( (bytes_processed < ContentLength) - && (strncasecmp(ptr, boundary, strlen(boundary))) ) { - ++ptr; - ++bytes_processed; - ++part_length; + else { + /** Set up for the next part. */ + part_start = strstr(next_boundary, "\n"); + ++part_start; + ptr = part_start; } - handle_part(beginning, part_length, ""); - /* Back off so we can see the next boundary */ - --ptr; - --bytes_processed; } + else { + /** Invalid end of multipart. Bail out! */ + ptr = content_end; + } + } while ( (ptr < content_end) && (next_boundary != NULL) ); + + if (PostMultiPartCallBack != NULL) { + PostMultiPartCallBack("", "", partnum, "", NULL, + content_type, charset, 0, encoding, userdata); } + goto end_parser; } + + /** If it's not a multipart message, then do something with it */ + if (!is_multipart) { + part_start = ptr; + length = 0; + while (ptr < content_end) { + ++ptr; + ++length; + } + part_end = content_end; + /** fix an off-by-one error */ + --part_end; + --length; + + /** Truncate if the header told us to */ + if ( (content_length > 0) && (length > content_length) ) { + length = content_length; + } + + /** + * Sometimes the "name" field is tacked on to Content-type, + * and sometimes it's tacked on to Content-disposition. Use + * whichever one we have. + */ + if (strlen(content_disposition_name) > strlen(content_type_name)) { + name = content_disposition_name; + } + else { + name = content_type_name; + } + + /* + lprintf(9, "mime_decode part=%s, len=%d, type=%s, charset=%s, encoding=%s\n", + partnum, length, content_type, charset, encoding); + */ + + /** + * Ok, we've got a non-multipart part here, so do something with it. + */ + mime_decode(partnum, + part_start, length, + content_type, charset, encoding, disposition, + name, filename, + CallBack, NULL, NULL, + userdata, dont_decode + ); + + /** + * Now if it's an encapsulated message/rfc822 then we have to recurse into it + */ + if (!strcasecmp(content_type, "message/rfc822")) { + + if (PreMultiPartCallBack != NULL) { + PreMultiPartCallBack("", "", partnum, "", + NULL, content_type, charset, + 0, encoding, userdata); + } + if (CallBack != NULL) { + if (strlen(partnum) > 0) { + snprintf(nested_partnum, + sizeof nested_partnum, + "%s.%d", partnum, + ++part_seq); + } + else { + snprintf(nested_partnum, + sizeof nested_partnum, + "%d", ++part_seq); + } + the_mime_parser(nested_partnum, + part_start, part_end, + CallBack, + PreMultiPartCallBack, + PostMultiPartCallBack, + userdata, + dont_decode + ); + } + if (PostMultiPartCallBack != NULL) { + PostMultiPartCallBack("", "", partnum, "", NULL, + content_type, charset, 0, encoding, userdata); + } + + + } + + } + +end_parser: /** free the buffers! end the oppression!! */ + free(boundary); + free(startary); + free(endary); + free(header); + free(content_type); + free(charset); + free(encoding); + free(content_type_name); + free(content_disposition_name); + free(filename); + free(disposition); +} + + + +/** + * \brief Entry point for the MIME parser. + * (This function expects to be fed HEADERS + CONTENT) + * Note: NULL can be supplied as content_end; in this case, the message is + * considered to have ended when the parser encounters a 0x00 byte. + * \param content_start todo ????????? + * \param content_end todo + * \param CallBack todo + * \param PreMultiPartCallBack todo + * \param PostMultiPartCallBack todo + * \param userdata todo + * \param dont_decode todo + */ +void mime_parser(char *content_start, + char *content_end, + + void (*CallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + + void (*PreMultiPartCallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + + void (*PostMultiPartCallBack) + (char *cbname, + char *cbfilename, + char *cbpartnum, + char *cbdisp, + void *cbcontent, + char *cbtype, + char *cbcharset, + size_t cblength, + char *cbencoding, + void *cbuserdata), + + void *userdata, + int dont_decode +) +{ + + the_mime_parser("", content_start, content_end, + CallBack, + PreMultiPartCallBack, + PostMultiPartCallBack, + userdata, dont_decode); +} + + + +/*@}*/