X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=webcit%2Fhtml2html.c;h=9d6e357aaabfdf71d577063f3c5ffae85670ec14;hb=719abc348ee76436439a16bcaccce317c12d3435;hp=abd5904e846a1577fa6c2af0c5a2d3fa3e72eed6;hpb=66bc2dac8a06eac6d8fe590d1ccc867090274579;p=citadel.git diff --git a/webcit/html2html.c b/webcit/html2html.c index abd5904e8..9d6e357aa 100644 --- a/webcit/html2html.c +++ b/webcit/html2html.c @@ -8,7 +8,6 @@ */ /*@{*/ #include "webcit.h" -#include "vcard.h" #include "webserver.h" @@ -59,6 +58,18 @@ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_ striplt(buf); if (!strncasecmp(buf, "charset=", 8)) { strcpy(charset, &buf[8]); + + /* + * The brain-damaged webmail program in Microsoft Exchange declares + * a charset of "unicode" when they really mean "UTF-8". GNU iconv + * treats "unicode" as an alias for "UTF-16" so we have to manually + * fix this here, otherwise messages generated in Exchange webmail + * show up as a big pile of weird characters. + */ + if (!strcasecmp(charset, "unicode")) { + strcpy(charset, "UTF-8"); + } + } } @@ -67,24 +78,25 @@ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_ /** * \brief Sanitize and enhance an HTML message for display. * Also convert weird character sets to UTF-8 if necessary. + * Also fixup img src="cid:..." type inline images to fetch the image * * \param supplied_charset the input charset as declared in the MIME headers */ -void output_html(char *supplied_charset, int treat_as_wiki) { +void output_html(char *supplied_charset, int treat_as_wiki, int msgnum) { char buf[SIZ]; char *msg; char *ptr; char *msgstart; char *msgend; - char *converted_msg; - size_t converted_alloc = 0; + StrBuf *converted_msg; int buffer_length = 1; int line_length = 0; int content_length = 0; - int output_length = 0; char new_window[SIZ]; int brak = 0; int alevel = 0; + int scriptlevel = 0; + int script_start_pos = (-1); int i; int linklen; char charset[128]; @@ -125,7 +137,7 @@ void output_html(char *supplied_charset, int treat_as_wiki) { } /** Do a first pass to isolate the message body */ - ptr = msg; + ptr = msg + 1; msgstart = msg; msgend = &msg[content_length]; @@ -252,16 +264,27 @@ void output_html(char *supplied_charset, int treat_as_wiki) { */ /** Now go through the message, parsing tags as necessary. */ - converted_alloc = content_length + 8192; - converted_msg = malloc(converted_alloc); + converted_msg = NewStrBufPlain(NULL, content_length + 8192); if (converted_msg == NULL) { - abort(); /* FIXME */ + wprintf("Error %d: %s
%s:%d", errno, strerror(errno), __FILE__, __LINE__); + goto BAIL; } - strcpy(converted_msg, ""); + ptr = msg; msgend = strchr(msg, 0); while (ptr < msgend) { + /** Try to sanitize the html of any rogue scripts */ + if (!strncasecmp(ptr, "= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], - "'))) ) { /* open external links to new window */ - content_length += 64; - if (content_length >= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], new_window); - output_length += strlen(new_window); + StrBufAppendPrintf(converted_msg, new_window); ptr = &ptr[8]; } else if ( (treat_as_wiki) && (strncasecmp(ptr, "= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], "'); + char* src=strstr(ptr, " src=\"cid:"); + char *cid_start, *cid_end; + ++brak; + + if (src && + (cid_start=strchr(src,':')) && + (cid_end=strchr(cid_start,'"')) && + (cid_end < tag_end)) { + + /* copy tag and attributes up to src="cid: */ + StrBufAppendBufPlain(converted_msg, ptr, src - ptr, 0); + cid_start++; + + /* add in /webcit/mimepart//CID/ + trailing / stops dumb URL filters getting excited */ + StrBufAppendPrintf(converted_msg, + "src=\"/webcit/mimepart/%d/",msgnum); + StrBufAppendBufPlain(converted_msg, cid_start, cid_end - cid_start, 0); + StrBufAppendBufPlain(converted_msg, "/\"", -1, 0); + + ptr = cid_end+1; + } + StrBufAppendBufPlain(converted_msg, ptr, tag_end - ptr, 0); + ptr = tag_end; + } + /** * Turn anything that looks like a URL into a real link, as long * as it's not inside a tag already */ else if ( (brak == 0) && (alevel == 0) && (!strncasecmp(ptr, "http://", 7))) { - linklen = 0; /** Find the end of the link */ - for (i=0; i<=strlen(ptr); ++i) { + int strlenptr; + linklen = 0; + + strlenptr = strlen(ptr); + for (i=0; i<=strlenptr; ++i) { if ((ptr[i]==0) ||(isspace(ptr[i])) ||(ptr[i]==10) @@ -341,48 +371,76 @@ void output_html(char *supplied_charset, int treat_as_wiki) { ||(ptr[i]=='>') ||(ptr[i]=='[') ||(ptr[i]==']') + ||(ptr[i]=='"') + ||(ptr[i]=='\'') ) linklen = i; + /* did s.b. send us an entity? */ + if (ptr[i] == '&') { + if ((ptr[i+2] ==';') || + (ptr[i+3] ==';') || + (ptr[i+5] ==';') || + (ptr[i+6] ==';') || + (ptr[i+7] ==';')) + linklen = i; + } if (linklen > 0) break; } if (linklen > 0) { - content_length += (32 + linklen); - if (content_length >= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } + char *ltreviewptr; + char *nbspreviewptr; + char linkedchar; + int len = linklen; + + len = linklen; + linkedchar = ptr[len]; + ptr[len] = '\0'; + /* spot for some subject strings tinymce tends to give us. */ + ltreviewptr = strchr(ptr, '<'); + if (ltreviewptr != NULL) { + *ltreviewptr = '\0'; + linklen = ltreviewptr - ptr; } - sprintf(&converted_msg[output_length], new_window); - output_length += strlen(new_window); - converted_msg[output_length] = '\"'; - converted_msg[++output_length] = 0; - for (i=0; i"); - output_length += 2; - for (i=0; i"); - output_length += 4; + if (ltreviewptr != 0) + *ltreviewptr = '<'; + + ptr[len] = linkedchar; + + content_length += (32 + linklen); + StrBufAppendPrintf(converted_msg, "%s\"", new_window); + StrBufAppendBufPlain(converted_msg, ptr, linklen, 0); + StrBufAppendPrintf(converted_msg, "\">"); + StrBufAppendBufPlain(converted_msg, ptr, linklen, 0); + ptr += linklen; + StrBufAppendPrintf(converted_msg, ""); } } else { - /** - * We need to know when we're inside a tag, - * so we don't turn things that look like URL's into - * links, when they're already links - or image sources. - */ - if (*ptr == '<') ++brak; - if (*ptr == '>') --brak; - if (!strncasecmp(ptr, "", 3)) --alevel; - converted_msg[output_length] = *ptr++; - converted_msg[++output_length] = 0; + StrBufAppendBufPlain(converted_msg, ptr, 1, 0); + ptr++; + } + + /** + * We need to know when we're inside a tag, + * so we don't turn things that look like URL's into + * links, when they're already links - or image sources. + */ + if (*(ptr-1) == '<') { + ++brak; + } + if (*(ptr-1) == '>') { + --brak; + if ((scriptlevel == 0) && (script_start_pos >= 0)) { + StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos); + script_start_pos = (-1); + } } + if (!strncasecmp(ptr, "", 3)) --alevel; } /** uncomment these two lines to override conversion */ @@ -390,13 +448,13 @@ void output_html(char *supplied_charset, int treat_as_wiki) { /** output_length = content_length; */ /** Output our big pile of markup */ - client_write(converted_msg, output_length); + StrBufAppendBuf(WC->WBuf, converted_msg, 0); - /** A little trailing vertical whitespace... */ +BAIL: /** A little trailing vertical whitespace... */ wprintf("

\n"); /** Now give back the memory */ - if (converted_msg != NULL) free(converted_msg); + FreeStrBuf(&converted_msg); if (msg != NULL) free(msg); }