X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=webcit%2Fhtml2html.c;h=965dc55d69bc57ea9458068f8d19602897b261aa;hb=fb6f6fa4ec4e3277e30d84326d48e6850822d318;hp=c65e674bcd69c649c65d3248fb9dcf66b829129f;hpb=2362c3d4de86f20822ab81015222a196137fd20e;p=citadel.git diff --git a/webcit/html2html.c b/webcit/html2html.c index c65e674bc..965dc55d6 100644 --- a/webcit/html2html.c +++ b/webcit/html2html.c @@ -1,20 +1,24 @@ /* - * $Id$ - */ -/** - * \defgroup HTML2HTML Output an HTML message, modifying it slightly to make sure it plays nice + * Output an HTML message, modifying it slightly to make sure it plays nice * with the rest of our web framework. - * \ingroup WebcitHttpServer + * + * Copyright (c) 2005-2012 by the citadel.org team + * + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. */ -/*@{*/ + #include "webcit.h" #include "webserver.h" -/** - * \brief Strip surrounding single or double quotes from a string. - * - * \param s String to be stripped. +/* + * Strip surrounding single or double quotes from a string. */ void stripquotes(char *s) { @@ -32,12 +36,12 @@ void stripquotes(char *s) } -/** - * \brief Check to see if a META tag has overridden the declared MIME character set. +/* + * Check to see if a META tag has overridden the declared MIME character set. * - * \param charset Character set name (left unchanged if we don't do anything) - * \param meta_http_equiv Content of the "http-equiv" portion of the META tag - * \param meta_content Content of the "content" portion of the META tag + * charset Character set name (left unchanged if we don't do anything) + * meta_http_equiv Content of the "http-equiv" portion of the META tag + * meta_content Content of the "content" portion of the META tag */ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_content) { @@ -70,29 +74,30 @@ void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_ strcpy(charset, "UTF-8"); } + /* Remove wandering punctuation */ + if ((ptr=strchr(charset, '\"'))) *ptr = 0; + striplt(charset); } } -/** - * \brief Sanitize and enhance an HTML message for display. - * Also convert weird character sets to UTF-8 if necessary. +/* + * Sanitize and enhance an HTML message for display. + * Also convert weird character sets to UTF-8 if necessary. + * Also fixup img src="cid:..." type inline images to fetch the image * - * \param supplied_charset the input charset as declared in the MIME headers */ -void output_html(char *supplied_charset, int treat_as_wiki) { +void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, StrBuf *Source, StrBuf *Target) { char buf[SIZ]; char *msg; char *ptr; char *msgstart; char *msgend; - char *converted_msg; - size_t converted_alloc = 0; + StrBuf *converted_msg; int buffer_length = 1; int line_length = 0; int content_length = 0; - int output_length = 0; char new_window[SIZ]; int brak = 0; int alevel = 0; @@ -101,29 +106,32 @@ void output_html(char *supplied_charset, int treat_as_wiki) { int i; int linklen; char charset[128]; + StrBuf *BodyArea = NULL; #ifdef HAVE_ICONV iconv_t ic = (iconv_t)(-1) ; - char *ibuf; /**< Buffer of characters to be converted */ - char *obuf; /**< Buffer for converted characters */ - size_t ibuflen; /**< Length of input buffer */ - size_t obuflen; /**< Length of output buffer */ - char *osav; /**< Saved pointer to output buffer */ + char *ibuf; /* Buffer of characters to be converted */ + char *obuf; /* Buffer for converted characters */ + size_t ibuflen; /* Length of input buffer */ + size_t obuflen; /* Length of output buffer */ + char *osav; /* Saved pointer to output buffer */ #endif + if (Target == NULL) + Target = WC->WBuf; safestrncpy(charset, supplied_charset, sizeof charset); msg = strdup(""); sprintf(new_window, ""); - wprintf(_("realloc() error! couldn't get %d bytes: %s"), - buffer_length + 1, - strerror(errno)); - wprintf("

\n"); + StrBufAppendPrintf(Target, ""); + StrBufAppendPrintf(Target, _("realloc() error! couldn't get %d bytes: %s"), + buffer_length + 1, + strerror(errno)); + StrBufAppendPrintf(Target, "

\n"); while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) { /** flush */ } @@ -136,6 +144,12 @@ void output_html(char *supplied_charset, int treat_as_wiki) { strcpy(&msg[content_length], "\n"); content_length += 1; } + else { + content_length = StrLength(Source); + free(msg); + msg = (char*) ChrPtr(Source);/* TODO: remove cast */ + buffer_length = content_length; + } /** Do a first pass to isolate the message body */ ptr = msg + 1; @@ -150,7 +164,7 @@ void output_html(char *supplied_charset, int treat_as_wiki) { ++ptr; if ((ptr == NULL) || (ptr >= msgend)) break; - /** + /* * Look for META tags. Some messages (particularly in * Asian locales) illegally declare a message's character * set in the HTML instead of in the MIME headers. This @@ -185,7 +199,7 @@ void output_html(char *supplied_charset, int treat_as_wiki) { stripquotes(meta_http_equiv); stripquotes(meta_content); extract_charset_from_meta(charset, - meta_http_equiv, meta_content); + meta_http_equiv, meta_content); } free(meta_content); } @@ -195,31 +209,75 @@ void output_html(char *supplied_charset, int treat_as_wiki) { } } - /** + /* * Any of these tags cause everything up to and including * the tag to be removed. */ if ( (!strncasecmp(ptr, "HTML", 4)) - ||(!strncasecmp(ptr, "HEAD", 4)) - ||(!strncasecmp(ptr, "/HEAD", 5)) - ||(!strncasecmp(ptr, "BODY", 4)) ) { + ||(!strncasecmp(ptr, "HEAD", 4)) + ||(!strncasecmp(ptr, "/HEAD", 5)) + ||(!strncasecmp(ptr, "BODY", 4)) ) { + char *pBody = NULL; + + if (!strncasecmp(ptr, "BODY", 4)) { + pBody = ptr; + } ptr = strchr(ptr, '>'); if ((ptr == NULL) || (ptr >= msgend)) break; + if ((pBody != NULL) && (ptr - pBody > 4)) { + char* src; + char *cid_start, *cid_end; + + *ptr = '\0'; + pBody += 4; + while ((isspace(*pBody)) && (pBody < ptr)) + pBody ++; + BodyArea = NewStrBufPlain(NULL, ptr - pBody); + + if (pBody < ptr) { + src = strstr(pBody, "cid:"); + if (src) { + cid_start = src + 4; + cid_end = cid_start; + while ((*cid_end != '"') && + !isspace(*cid_end) && + (cid_end < ptr)) + cid_end ++; + + /* copy tag and attributes up to src="cid: */ + StrBufAppendBufPlain(BodyArea, pBody, src - pBody, 0); + + /* add in /webcit/mimepart//CID/ + trailing / stops dumb URL filters getting excited */ + StrBufAppendPrintf(BodyArea, + "/webcit/mimepart/%d/",msgnum); + StrBufAppendBufPlain(BodyArea, cid_start, cid_end - cid_start, 0); + + if (ptr - cid_end > 0) + StrBufAppendBufPlain(BodyArea, + cid_end + 1, + ptr - cid_end, 0); + } + else + StrBufAppendBufPlain(BodyArea, pBody, ptr - pBody, 0); + } + *ptr = '>'; + } ++ptr; if ((ptr == NULL) || (ptr >= msgend)) break; msgstart = ptr; } - /** + /* * Any of these tags cause everything including and following * the tag to be removed. */ if ( (!strncasecmp(ptr, "/HTML", 5)) - ||(!strncasecmp(ptr, "/BODY", 5)) ) { + ||(!strncasecmp(ptr, "/BODY", 5)) ) { --ptr; msgend = ptr; strcpy(ptr, ""); - + } ++ptr; @@ -228,35 +286,51 @@ void output_html(char *supplied_charset, int treat_as_wiki) { strcpy(msg, msgstart); } + /* Now go through the message, parsing tags as necessary. */ + converted_msg = NewStrBufPlain(NULL, content_length + 8192); + + /** Convert foreign character sets to UTF-8 if necessary. */ #ifdef HAVE_ICONV if ( (strcasecmp(charset, "us-ascii")) - && (strcasecmp(charset, "UTF-8")) - && (strcasecmp(charset, "")) - ) { - lprintf(9, "Converting %s to UTF-8\n", charset); - ic = ctdl_iconv_open("UTF-8", charset); + && (strcasecmp(charset, "UTF-8")) + && (strcasecmp(charset, "")) + ) { + syslog(9, "Converting %s to UTF-8\n", charset); + ctdl_iconv_open("UTF-8", charset, &ic); if (ic == (iconv_t)(-1) ) { - lprintf(5, "%s:%d iconv_open() failed: %s\n", - __FILE__, __LINE__, strerror(errno)); + syslog(5, "%s:%d iconv_open() failed: %s\n", + __FILE__, __LINE__, strerror(errno)); } } - if (ic != (iconv_t)(-1) ) { - ibuf = msg; - ibuflen = content_length; - obuflen = content_length + (content_length / 2) ; - obuf = (char *) malloc(obuflen); - osav = obuf; - iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen); - content_length = content_length + (content_length / 2) - obuflen; - osav[content_length] = 0; - free(msg); - msg = osav; - iconv_close(ic); + if (Source == NULL) { + if (ic != (iconv_t)(-1) ) { + ibuf = msg; + ibuflen = content_length; + obuflen = content_length + (content_length / 2) ; + obuf = (char *) malloc(obuflen); + osav = obuf; + iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen); + content_length = content_length + (content_length / 2) - obuflen; + osav[content_length] = 0; + free(msg); + msg = osav; + iconv_close(ic); + } } + else { + if (ic != (iconv_t)(-1) ) { + StrBuf *Buf = NewStrBufPlain(NULL, StrLength(Source) + 8096);; + StrBufConvert(Source, Buf, &ic); + FreeStrBuf(&Buf); + iconv_close(ic); + msg = (char*)ChrPtr(Source); /* TODO: get rid of this. */ + } + } + #endif - /** + /* * At this point, the message has been stripped down to * only the content inside the tags, and has * been converted to UTF-8 if it was originally in a foreign @@ -264,15 +338,16 @@ void output_html(char *supplied_charset, int treat_as_wiki) { * terminated now. */ - /** Now go through the message, parsing tags as necessary. */ - converted_alloc = content_length + 8192; - converted_msg = malloc(converted_alloc); if (converted_msg == NULL) { - wprintf("Error %d: %s
%s:%d", errno, strerror(errno), __FILE__, __LINE__); + StrBufAppendPrintf(Target, "Error %d: %s
%s:%d", errno, strerror(errno), __FILE__, __LINE__); goto BAIL; } - strcpy(converted_msg, ""); + if (BodyArea != NULL) { + StrBufAppendBufPlain(converted_msg, HKEY("
"), 0); + } ptr = msg; msgend = strchr(msg, 0); while (ptr < msgend) { @@ -280,7 +355,7 @@ void output_html(char *supplied_charset, int treat_as_wiki) { /** Try to sanitize the html of any rogue scripts */ if (!strncasecmp(ptr, "= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], - "'))) - ) { + && ((strchr(ptr, '/') < strchr(ptr, '>'))) + ) { /* open external links to new window */ - content_length += 64; - if (content_length >= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], new_window); - output_length += strlen(new_window); + StrBufAppendPrintf(converted_msg, new_window); ptr = &ptr[8]; } - else if ( (treat_as_wiki) && (strncasecmp(ptr, "= converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], "CurRoom.name, NULL); + StrBufAppendPrintf(converted_msg, "?page="); ptr = &ptr[9]; } else { - sprintf(&converted_msg[output_length], "'); + char* src; + /* FIXME - handle this situation (maybe someone opened an = converted_alloc) { - converted_alloc += 8192; - converted_msg = realloc(converted_msg, converted_alloc); - if (converted_msg == NULL) { - abort(); - } - } - sprintf(&converted_msg[output_length], new_window); - output_length += strlen(new_window); - converted_msg[output_length] = '\"'; - converted_msg[++output_length] = 0; - for (i=0; i"); - output_length += 2; - for (i=0; i"); - output_length += 4; + StrBufAppendPrintf(converted_msg, "%s\"", new_window); + StrBufAppendBufPlain(converted_msg, ptr, linklen, 0); + StrBufAppendPrintf(converted_msg, "\">"); + StrBufAppendBufPlain(converted_msg, ptr, linklen, 0); + ptr += linklen; + StrBufAppendPrintf(converted_msg, ""); } } else { - converted_msg[output_length] = *ptr++; - converted_msg[++output_length] = 0; + StrBufAppendBufPlain(converted_msg, ptr, 1, 0); + ptr++; } - /** - * We need to know when we're inside a tag, - * so we don't turn things that look like URL's into - * links, when they're already links - or image sources. - */ - if (*(ptr-1) == '<') { - ++brak; - } - if (*(ptr-1) == '>') { - --brak; - if ((scriptlevel == 0) && (script_start_pos >= 0)) { - output_length = script_start_pos; - converted_msg[output_length] = 0; - script_start_pos = (-1); + + if ((ptr >= msg) && (ptr <= msgend)) { + /* + * We need to know when we're inside a tag, + * so we don't turn things that look like URL's into + * links, when they're already links - or image sources. + */ + if ((ptr > msg) && (*(ptr-1) == '<')) { + ++brak; } + if ((ptr > msg) && (*(ptr-1) == '>')) { + --brak; + if ((scriptlevel == 0) && (script_start_pos >= 0)) { + StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos); + script_start_pos = (-1); + } + } + if (!strncasecmp(ptr, "", 3)) --alevel; } - if (!strncasecmp(ptr, "", 3)) --alevel; + } + + if (BodyArea != NULL) { + StrBufAppendBufPlain(converted_msg, HKEY("
"), 0); + FreeStrBuf(&BodyArea); } /** uncomment these two lines to override conversion */ @@ -467,14 +553,155 @@ void output_html(char *supplied_charset, int treat_as_wiki) { /** output_length = content_length; */ /** Output our big pile of markup */ - client_write(converted_msg, output_length); + StrBufAppendBuf(Target, converted_msg, 0); BAIL: /** A little trailing vertical whitespace... */ - wprintf("

\n"); + StrBufAppendPrintf(Target, "

\n"); /** Now give back the memory */ - if (converted_msg != NULL) free(converted_msg); - if (msg != NULL) free(msg); + FreeStrBuf(&converted_msg); + if ((msg != NULL) && (Source == NULL)) free(msg); +} + + + + + + +/* + * Look for URL's embedded in a buffer and make them linkable. We use a + * target window in order to keep the Citadel session in its own window. + */ +void UrlizeText(StrBuf* Target, StrBuf *Source, StrBuf *WrkBuf) +{ + int len, UrlLen, Offset, TrailerLen; + const char *start, *end, *pos; + + FlushStrBuf(Target); + + start = NULL; + len = StrLength(Source); + end = ChrPtr(Source) + len; + for (pos = ChrPtr(Source); (pos < end) && (start == NULL); ++pos) { + if (!strncasecmp(pos, "http://", 7)) + start = pos; + else if (!strncasecmp(pos, "ftp://", 6)) + start = pos; + } + + if (start == NULL) { + StrBufAppendBuf(Target, Source, 0); + return; + } + FlushStrBuf(WrkBuf); + + for (pos = ChrPtr(Source) + len; pos > start; --pos) { + if ( (!isprint(*pos)) + || (isspace(*pos)) + || (*pos == '{') + || (*pos == '}') + || (*pos == '|') + || (*pos == '\\') + || (*pos == '^') + || (*pos == '[') + || (*pos == ']') + || (*pos == '`') + || (*pos == '<') + || (*pos == '>') + || (*pos == '(') + || (*pos == ')') + ) { + end = pos; + } + } + + UrlLen = end - start; + StrBufAppendBufPlain(WrkBuf, start, UrlLen, 0); + + Offset = start - ChrPtr(Source); + if (Offset != 0) + StrBufAppendBufPlain(Target, ChrPtr(Source), Offset, 0); + StrBufAppendPrintf(Target, "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c", + LB, QU, ChrPtr(WrkBuf), QU, QU, TARGET, + QU, RB, ChrPtr(WrkBuf), LB, RB); + + TrailerLen = StrLength(Source) - (end - ChrPtr(Source)); + if (TrailerLen > 0) + StrBufAppendBufPlain(Target, end, TrailerLen, 0); +} + + +void url(char *buf, size_t bufsize) +{ + int len, UrlLen, Offset, TrailerLen, outpos; + char *start, *end, *pos; + char urlbuf[SIZ]; + char outbuf[SIZ]; + + start = NULL; + len = strlen(buf); + if (len > bufsize) { + syslog(1, "URL: content longer than buffer!"); + return; + } + end = buf + len; + for (pos = buf; (pos < end) && (start == NULL); ++pos) { + if (!strncasecmp(pos, "http://", 7)) + start = pos; + if (!strncasecmp(pos, "ftp://", 6)) + start = pos; + } + + if (start == NULL) + return; + + for (pos = buf+len; pos > start; --pos) { + if ( (!isprint(*pos)) + || (isspace(*pos)) + || (*pos == '{') + || (*pos == '}') + || (*pos == '|') + || (*pos == '\\') + || (*pos == '^') + || (*pos == '[') + || (*pos == ']') + || (*pos == '`') + || (*pos == '<') + || (*pos == '>') + || (*pos == '(') + || (*pos == ')') + ) { + end = pos; + } + } + + UrlLen = end - start; + if (UrlLen > sizeof(urlbuf)){ + syslog(1, "URL: content longer than buffer!"); + return; + } + memcpy(urlbuf, start, UrlLen); + urlbuf[UrlLen] = '\0'; + + Offset = start - buf; + if ((Offset != 0) && (Offset < sizeof(outbuf))) + memcpy(outbuf, buf, Offset); + outpos = snprintf(&outbuf[Offset], sizeof(outbuf) - Offset, + "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c", + LB, QU, urlbuf, QU, QU, TARGET, QU, RB, urlbuf, LB, RB); + if (outpos >= sizeof(outbuf) - Offset) { + syslog(1, "URL: content longer than buffer!"); + return; + } + + TrailerLen = len - (end - start); + if (TrailerLen > 0) + memcpy(outbuf + Offset + outpos, end, TrailerLen); + if (Offset + outpos + TrailerLen > bufsize) { + syslog(1, "URL: content longer than buffer!"); + return; + } + memcpy (buf, outbuf, Offset + outpos + TrailerLen); + *(buf + Offset + outpos + TrailerLen) = '\0'; } -/*@}*/