X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=webcit%2Fhtml2html.c;h=965dc55d69bc57ea9458068f8d19602897b261aa;hb=fb6f6fa4ec4e3277e30d84326d48e6850822d318;hp=8e665f42dff31258abb43ad4b575924c134f484a;hpb=09d54aed185b63303ba5b1585f34f2a1a9688bc8;p=citadel.git diff --git a/webcit/html2html.c b/webcit/html2html.c index 8e665f42d..965dc55d6 100644 --- a/webcit/html2html.c +++ b/webcit/html2html.c @@ -1,20 +1,24 @@ /* - * $Id$ - */ -/** - * \defgroup HTML2HTML Output an HTML message, modifying it slightly to make sure it plays nice + * Output an HTML message, modifying it slightly to make sure it plays nice * with the rest of our web framework. - * \ingroup WebcitHttpServer + * + * Copyright (c) 2005-2012 by the citadel.org team + * + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. */ -/*@{*/ + #include "webcit.h" #include "webserver.h" -/** - * \brief Strip surrounding single or double quotes from a string. - * - * \param s String to be stripped. +/* + * Strip surrounding single or double quotes from a string. */ void stripquotes(char *s) { @@ -32,12 +36,12 @@ void stripquotes(char *s) } -/** - * \brief Check to see if a META tag has overridden the declared MIME character set. +/* + * Check to see if a META tag has overridden the declared MIME character set. * - * \param charset Character set name (left unchanged if we don't do anything) - * \param meta_http_equiv Content of the "http-equiv" portion of the META tag - * \param meta_content Content of the "content" portion of the META tag + * charset Character set name (left unchanged if we don't do anything) + * meta_http_equiv Content of the "http-equiv" portion of the META tag + * meta_content Content of the "content" portion of the META tag */ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_content) { @@ -70,17 +74,19 @@ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_ strcpy(charset, "UTF-8"); } + /* Remove wandering punctuation */ + if ((ptr=strchr(charset, '\"'))) *ptr = 0; + striplt(charset); } } -/** - * \brief Sanitize and enhance an HTML message for display. - * Also convert weird character sets to UTF-8 if necessary. - * Also fixup img src="cid:..." type inline images to fetch the image +/* + * Sanitize and enhance an HTML message for display. + * Also convert weird character sets to UTF-8 if necessary. + * Also fixup img src="cid:..." type inline images to fetch the image * - * \param supplied_charset the input charset as declared in the MIME headers */ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, StrBuf *Source, StrBuf *Target) { char buf[SIZ]; @@ -100,15 +106,15 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St int i; int linklen; char charset[128]; + StrBuf *BodyArea = NULL; #ifdef HAVE_ICONV iconv_t ic = (iconv_t)(-1) ; - char *ibuf; /**< Buffer of characters to be converted */ - char *obuf; /**< Buffer for converted characters */ - size_t ibuflen; /**< Length of input buffer */ - size_t obuflen; /**< Length of output buffer */ - char *osav; /**< Saved pointer to output buffer */ + char *ibuf; /* Buffer of characters to be converted */ + char *obuf; /* Buffer for converted characters */ + size_t ibuflen; /* Length of input buffer */ + size_t obuflen; /* Length of output buffer */ + char *osav; /* Saved pointer to output buffer */ #endif - lprintf(9, "output_html() called with msgnum=%ld\n", msgnum); if (Target == NULL) Target = WC->WBuf; @@ -123,9 +129,9 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St if (ptr == NULL) { StrBufAppendPrintf(Target, ""); StrBufAppendPrintf(Target, _("realloc() error! couldn't get %d bytes: %s"), - buffer_length + 1, - strerror(errno)); - StrBufAppendPrintf(Target, "

\n"); + buffer_length + 1, + strerror(errno)); + StrBufAppendPrintf(Target, "

\n"); while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) { /** flush */ } @@ -158,7 +164,7 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St ++ptr; if ((ptr == NULL) || (ptr >= msgend)) break; - /** + /* * Look for META tags. Some messages (particularly in * Asian locales) illegally declare a message's character * set in the HTML instead of in the MIME headers. This @@ -193,7 +199,7 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St stripquotes(meta_http_equiv); stripquotes(meta_content); extract_charset_from_meta(charset, - meta_http_equiv, meta_content); + meta_http_equiv, meta_content); } free(meta_content); } @@ -203,31 +209,75 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St } } - /** + /* * Any of these tags cause everything up to and including * the tag to be removed. */ if ( (!strncasecmp(ptr, "HTML", 4)) - ||(!strncasecmp(ptr, "HEAD", 4)) - ||(!strncasecmp(ptr, "/HEAD", 5)) - ||(!strncasecmp(ptr, "BODY", 4)) ) { + ||(!strncasecmp(ptr, "HEAD", 4)) + ||(!strncasecmp(ptr, "/HEAD", 5)) + ||(!strncasecmp(ptr, "BODY", 4)) ) { + char *pBody = NULL; + + if (!strncasecmp(ptr, "BODY", 4)) { + pBody = ptr; + } ptr = strchr(ptr, '>'); if ((ptr == NULL) || (ptr >= msgend)) break; + if ((pBody != NULL) && (ptr - pBody > 4)) { + char* src; + char *cid_start, *cid_end; + + *ptr = '\0'; + pBody += 4; + while ((isspace(*pBody)) && (pBody < ptr)) + pBody ++; + BodyArea = NewStrBufPlain(NULL, ptr - pBody); + + if (pBody < ptr) { + src = strstr(pBody, "cid:"); + if (src) { + cid_start = src + 4; + cid_end = cid_start; + while ((*cid_end != '"') && + !isspace(*cid_end) && + (cid_end < ptr)) + cid_end ++; + + /* copy tag and attributes up to src="cid: */ + StrBufAppendBufPlain(BodyArea, pBody, src - pBody, 0); + + /* add in /webcit/mimepart//CID/ + trailing / stops dumb URL filters getting excited */ + StrBufAppendPrintf(BodyArea, + "/webcit/mimepart/%d/",msgnum); + StrBufAppendBufPlain(BodyArea, cid_start, cid_end - cid_start, 0); + + if (ptr - cid_end > 0) + StrBufAppendBufPlain(BodyArea, + cid_end + 1, + ptr - cid_end, 0); + } + else + StrBufAppendBufPlain(BodyArea, pBody, ptr - pBody, 0); + } + *ptr = '>'; + } ++ptr; if ((ptr == NULL) || (ptr >= msgend)) break; msgstart = ptr; } - /** + /* * Any of these tags cause everything including and following * the tag to be removed. */ if ( (!strncasecmp(ptr, "/HTML", 5)) - ||(!strncasecmp(ptr, "/BODY", 5)) ) { + ||(!strncasecmp(ptr, "/BODY", 5)) ) { --ptr; msgend = ptr; strcpy(ptr, ""); - + } ++ptr; @@ -236,21 +286,21 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St strcpy(msg, msgstart); } - /** Now go through the message, parsing tags as necessary. */ + /* Now go through the message, parsing tags as necessary. */ converted_msg = NewStrBufPlain(NULL, content_length + 8192); /** Convert foreign character sets to UTF-8 if necessary. */ #ifdef HAVE_ICONV if ( (strcasecmp(charset, "us-ascii")) - && (strcasecmp(charset, "UTF-8")) - && (strcasecmp(charset, "")) - ) { - lprintf(9, "Converting %s to UTF-8\n", charset); + && (strcasecmp(charset, "UTF-8")) + && (strcasecmp(charset, "")) + ) { + syslog(9, "Converting %s to UTF-8\n", charset); ctdl_iconv_open("UTF-8", charset, &ic); if (ic == (iconv_t)(-1) ) { - lprintf(5, "%s:%d iconv_open() failed: %s\n", - __FILE__, __LINE__, strerror(errno)); + syslog(5, "%s:%d iconv_open() failed: %s\n", + __FILE__, __LINE__, strerror(errno)); } } if (Source == NULL) { @@ -277,10 +327,10 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St msg = (char*)ChrPtr(Source); /* TODO: get rid of this. */ } } - + #endif - /** + /* * At this point, the message has been stripped down to * only the content inside the tags, and has * been converted to UTF-8 if it was originally in a foreign @@ -289,10 +339,15 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St */ if (converted_msg == NULL) { - StrBufAppendPrintf(Target, "Error %d: %s
%s:%d", errno, strerror(errno), __FILE__, __LINE__); + StrBufAppendPrintf(Target, "Error %d: %s
%s:%d", errno, strerror(errno), __FILE__, __LINE__); goto BAIL; } + if (BodyArea != NULL) { + StrBufAppendBufPlain(converted_msg, HKEY("
"), 0); + } ptr = msg; msgend = strchr(msg, 0); while (ptr < msgend) { @@ -317,7 +372,7 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St if (!strncasecmp(ptr, "'))) - ) { + && ((strchr(ptr, '/') < strchr(ptr, '>'))) + ) { /* open external links to new window */ StrBufAppendPrintf(converted_msg, new_window); ptr = &ptr[8]; } - else if ( (treat_as_wiki) && (strncasecmp(ptr, "CurRoom.name, NULL); + StrBufAppendPrintf(converted_msg, "?page="); ptr = &ptr[9]; } else { @@ -345,27 +407,39 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St } /** Fixup '); - char* src=strstr(ptr, " src=\"cid:"); char *cid_start, *cid_end; - ++brak; + char* tag_end=strchr(ptr,'>'); + char* src; + /* FIXME - handle this situation (maybe someone opened an ') { - --brak; - if ((scriptlevel == 0) && (script_start_pos >= 0)) { - StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos); - script_start_pos = (-1); + + if ((ptr >= msg) && (ptr <= msgend)) { + /* + * We need to know when we're inside a tag, + * so we don't turn things that look like URL's into + * links, when they're already links - or image sources. + */ + if ((ptr > msg) && (*(ptr-1) == '<')) { + ++brak; + } + if ((ptr > msg) && (*(ptr-1) == '>')) { + --brak; + if ((scriptlevel == 0) && (script_start_pos >= 0)) { + StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos); + script_start_pos = (-1); + } } + if (!strncasecmp(ptr, "", 3)) --alevel; } - if (!strncasecmp(ptr, "", 3)) --alevel; + } + + if (BodyArea != NULL) { + StrBufAppendBufPlain(converted_msg, HKEY("
"), 0); + FreeStrBuf(&BodyArea); } /** uncomment these two lines to override conversion */ @@ -474,7 +556,7 @@ void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, St StrBufAppendBuf(Target, converted_msg, 0); BAIL: /** A little trailing vertical whitespace... */ - StrBufAppendPrintf(Target, "

\n"); + StrBufAppendPrintf(Target, "

\n"); /** Now give back the memory */ FreeStrBuf(&converted_msg); @@ -547,6 +629,8 @@ void UrlizeText(StrBuf* Target, StrBuf *Source, StrBuf *WrkBuf) if (TrailerLen > 0) StrBufAppendBufPlain(Target, end, TrailerLen, 0); } + + void url(char *buf, size_t bufsize) { int len, UrlLen, Offset, TrailerLen, outpos; @@ -557,7 +641,7 @@ void url(char *buf, size_t bufsize) start = NULL; len = strlen(buf); if (len > bufsize) { - lprintf(1, "URL: content longer than buffer!"); + syslog(1, "URL: content longer than buffer!"); return; } end = buf + len; @@ -593,7 +677,7 @@ void url(char *buf, size_t bufsize) UrlLen = end - start; if (UrlLen > sizeof(urlbuf)){ - lprintf(1, "URL: content longer than buffer!"); + syslog(1, "URL: content longer than buffer!"); return; } memcpy(urlbuf, start, UrlLen); @@ -606,7 +690,7 @@ void url(char *buf, size_t bufsize) "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c", LB, QU, urlbuf, QU, QU, TARGET, QU, RB, urlbuf, LB, RB); if (outpos >= sizeof(outbuf) - Offset) { - lprintf(1, "URL: content longer than buffer!"); + syslog(1, "URL: content longer than buffer!"); return; } @@ -614,14 +698,10 @@ void url(char *buf, size_t bufsize) if (TrailerLen > 0) memcpy(outbuf + Offset + outpos, end, TrailerLen); if (Offset + outpos + TrailerLen > bufsize) { - lprintf(1, "URL: content longer than buffer!"); + syslog(1, "URL: content longer than buffer!"); return; } memcpy (buf, outbuf, Offset + outpos + TrailerLen); *(buf + Offset + outpos + TrailerLen) = '\0'; } - - - -/*@}*/