X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=webcit%2Fhtml2html.c;h=cab626d3ee48e618ddb05f097553a43f26e36be6;hb=8404d8b57a7de85367732cf0e710c6f81a8f590b;hp=72356c700bf539566f5baee2cd92b3c74e7372a9;hpb=8913a797d8e44247d9bab88fa68c002cbb639e92;p=citadel.git diff --git a/webcit/html2html.c b/webcit/html2html.c index 72356c700..cab626d3e 100644 --- a/webcit/html2html.c +++ b/webcit/html2html.c @@ -8,7 +8,6 @@ */ /*@{*/ #include "webcit.h" -#include "vcard.h" #include "webserver.h" @@ -79,24 +78,25 @@ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_ /** * \brief Sanitize and enhance an HTML message for display. * Also convert weird character sets to UTF-8 if necessary. + * Also fixup img src="cid:..." type inline images to fetch the image * * \param supplied_charset the input charset as declared in the MIME headers */ -void output_html(char *supplied_charset, int treat_as_wiki) { +void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, StrBuf *Source, StrBuf *Target) { char buf[SIZ]; char *msg; char *ptr; char *msgstart; char *msgend; - char *converted_msg; - size_t converted_alloc = 0; + StrBuf *converted_msg; int buffer_length = 1; int line_length = 0; int content_length = 0; - int output_length = 0; char new_window[SIZ]; int brak = 0; int alevel = 0; + int scriptlevel = 0; + int script_start_pos = (-1); int i; int linklen; char charset[128]; @@ -108,21 +108,23 @@ void output_html(char *supplied_charset, int treat_as_wiki) { size_t obuflen; /**< Length of output buffer */ char *osav; /**< Saved pointer to output buffer */ #endif + if (Target == NULL) + Target = WC->WBuf; safestrncpy(charset, supplied_charset, sizeof charset); msg = strdup(""); sprintf(new_window, ""); - wprintf(_("realloc() error! couldn't get %d bytes: %s"), + StrBufAppendPrintf(Target, ""); + StrBufAppendPrintf(Target, _("realloc() error! couldn't get %d bytes: %s"), buffer_length + 1, strerror(errno)); - wprintf("

\n"); + StrBufAppendPrintf(Target, "

\n"); while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) { /** flush */ } @@ -135,9 +137,15 @@ void output_html(char *supplied_charset, int treat_as_wiki) { strcpy(&msg[content_length], "\n"); content_length += 1; } + else { + content_length = StrLength(Source); + free(msg); + msg = (char*) ChrPtr(Source);/* TODO: remove cast */ + buffer_length = content_length; + } /** Do a first pass to isolate the message body */ - ptr = msg; + ptr = msg + 1; msgstart = msg; msgend = &msg[content_length]; @@ -227,6 +235,10 @@ void output_html(char *supplied_charset, int treat_as_wiki) { strcpy(msg, msgstart); } + /** Now go through the message, parsing tags as necessary. */ + converted_msg = NewStrBufPlain(NULL, content_length + 8192); + + /** Convert foreign character sets to UTF-8 if necessary. */ #ifdef HAVE_ICONV if ( (strcasecmp(charset, "us-ascii")) @@ -234,25 +246,36 @@ void output_html(char *supplied_charset, int treat_as_wiki) { && (strcasecmp(charset, "")) ) { lprintf(9, "Converting %s to UTF-8\n", charset); - ic = ctdl_iconv_open("UTF-8", charset); + ctdl_iconv_open("UTF-8", charset, &ic); if (ic == (iconv_t)(-1) ) { lprintf(5, "%s:%d iconv_open() failed: %s\n", __FILE__, __LINE__, strerror(errno)); } } - if (ic != (iconv_t)(-1) ) { - ibuf = msg; - ibuflen = content_length; - obuflen = content_length + (content_length / 2) ; - obuf = (char *) malloc(obuflen); - osav = obuf; - iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen); - content_length = content_length + (content_length / 2) - obuflen; - osav[content_length] = 0; - free(msg); - msg = osav; - iconv_close(ic); + if (Source == NULL) { + if (ic != (iconv_t)(-1) ) { + ibuf = msg; + ibuflen = content_length; + obuflen = content_length + (content_length / 2) ; + obuf = (char *) malloc(obuflen); + osav = obuf; + iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen); + content_length = content_length + (content_length / 2) - obuflen; + osav[content_length] = 0; + free(msg); + msg = osav; + iconv_close(ic); + } + } + else { + if (ic != (iconv_t)(-1) ) { + StrBuf *Buf = NewStrBufPlain(NULL, StrLength(Source) + 8096);; + StrBufConvert(Source, Buf, &ic); + FreeStrBuf(&Buf); + iconv_close(ic); + } } + #endif /** @@ -263,19 +286,26 @@ void output_html(char *supplied_charset, int treat_as_wiki) { * terminated now. */ - /** Now go through the message, parsing tags as necessary. */ - converted_alloc = content_length + 8192; - converted_msg = malloc(converted_alloc); if (converted_msg == NULL) { - wprintf("Error %d: %s
%s:%s", errno, strerror(errno), __FILE__, __LINE__); + StrBufAppendPrintf(Target, "Error %d: %s
%s:%d", errno, strerror(errno), __FILE__, __LINE__); goto BAIL; } - strcpy(converted_msg, ""); ptr = msg; msgend = strchr(msg, 0); while (ptr < msgend) { + /** Try to sanitize the html of any rogue scripts */ + if (!strncasecmp(ptr, "= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], - "
'))) ) { /* open external links to new window */ - content_length += 64; - if (content_length >= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], new_window); - output_length += strlen(new_window); + StrBufAppendPrintf(converted_msg, new_window); ptr = &ptr[8]; } else if ( (treat_as_wiki) && (strncasecmp(ptr, "= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], "'); + char* src=strstr(ptr, " src=\"cid:"); + char *cid_start, *cid_end; + ++brak; + + if (src && + (cid_start=strchr(src,':')) && + (cid_end=strchr(cid_start,'"')) && + (cid_end < tag_end)) { + + /* copy tag and attributes up to src="cid: */ + StrBufAppendBufPlain(converted_msg, ptr, src - ptr, 0); + cid_start++; + + /* add in /webcit/mimepart//CID/ + trailing / stops dumb URL filters getting excited */ + StrBufAppendPrintf(converted_msg, + "src=\"/webcit/mimepart/%d/",msgnum); + StrBufAppendBufPlain(converted_msg, cid_start, cid_end - cid_start, 0); + StrBufAppendBufPlain(converted_msg, "/\"", -1, 0); + + ptr = cid_end+1; + } + StrBufAppendBufPlain(converted_msg, ptr, tag_end - ptr, 0); + ptr = tag_end; + } + /** * Turn anything that looks like a URL into a real link, as long * as it's not inside a tag already */ else if ( (brak == 0) && (alevel == 0) && (!strncasecmp(ptr, "http://", 7))) { - linklen = 0; /** Find the end of the link */ - for (i=0; i<=strlen(ptr); ++i) { + int strlenptr; + linklen = 0; + + strlenptr = strlen(ptr); + for (i=0; i<=strlenptr; ++i) { if ((ptr[i]==0) ||(isspace(ptr[i])) ||(ptr[i]==10) @@ -355,47 +392,75 @@ void output_html(char *supplied_charset, int treat_as_wiki) { ||(ptr[i]=='>') ||(ptr[i]=='[') ||(ptr[i]==']') + ||(ptr[i]=='"') + ||(ptr[i]=='\'') ) linklen = i; + /* did s.b. send us an entity? */ + if (ptr[i] == '&') { + if ((ptr[i+2] ==';') || + (ptr[i+3] ==';') || + (ptr[i+5] ==';') || + (ptr[i+6] ==';') || + (ptr[i+7] ==';')) + linklen = i; + } if (linklen > 0) break; } if (linklen > 0) { - content_length += (32 + linklen); - if (content_length >= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], new_window); - output_length += strlen(new_window); - converted_msg[output_length] = '\"'; - converted_msg[++output_length] = 0; - for (i=0; i"); - output_length += 2; - for (i=0; i"); - output_length += 4; + if (ltreviewptr != 0) + *ltreviewptr = '<'; + + ptr[len] = linkedchar; + + content_length += (32 + linklen); + StrBufAppendPrintf(converted_msg, "%s\"", new_window); + StrBufAppendBufPlain(converted_msg, ptr, linklen, 0); + StrBufAppendPrintf(converted_msg, "\">"); + StrBufAppendBufPlain(converted_msg, ptr, linklen, 0); + ptr += linklen; + StrBufAppendPrintf(converted_msg, ""); } } else { - converted_msg[output_length] = *ptr++; - converted_msg[++output_length] = 0; + StrBufAppendBufPlain(converted_msg, ptr, 1, 0); + ptr++; } + /** * We need to know when we're inside a tag, * so we don't turn things that look like URL's into * links, when they're already links - or image sources. */ - if (*ptr == '<') ++brak; - if (*ptr == '>') --brak; + if (*(ptr-1) == '<') { + ++brak; + } + if (*(ptr-1) == '>') { + --brak; + if ((scriptlevel == 0) && (script_start_pos >= 0)) { + StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos); + script_start_pos = (-1); + } + } if (!strncasecmp(ptr, "", 3)) --alevel; } @@ -404,14 +469,14 @@ void output_html(char *supplied_charset, int treat_as_wiki) { /** output_length = content_length; */ /** Output our big pile of markup */ - client_write(converted_msg, output_length); + StrBufAppendBuf(Target, converted_msg, 0); BAIL: /** A little trailing vertical whitespace... */ - wprintf("

\n"); + StrBufAppendPrintf(Target, "

\n"); /** Now give back the memory */ - if (converted_msg != NULL) free(converted_msg); - if (msg != NULL) free(msg); + FreeStrBuf(&converted_msg); + if ((msg != NULL) && (Source == NULL)) free(msg); } /*@}*/