2 * Output an HTML message, modifying it slightly to make sure it plays nice
3 * with the rest of our web framework.
5 * Copyright (c) 2005-2010 by the citadel.org team
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "webserver.h"
27 * Strip surrounding single or double quotes from a string.
29 void stripquotes(char *s)
38 if ( ( (s[0] == '\"') && (s[len-1] == '\"') ) || ( (s[0] == '\'') && (s[len-1] == '\'') ) ) {
46 * Check to see if a META tag has overridden the declared MIME character set.
48 * charset Character set name (left unchanged if we don't do anything)
49 * meta_http_equiv Content of the "http-equiv" portion of the META tag
50 * meta_content Content of the "content" portion of the META tag
52 void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_content)
58 if (!meta_http_equiv) return;
59 if (!meta_content) return;
62 if (strcasecmp(meta_http_equiv, "Content-type")) return;
64 ptr = strchr(meta_content, ';');
67 safestrncpy(buf, ++ptr, sizeof buf);
69 if (!strncasecmp(buf, "charset=", 8)) {
70 strcpy(charset, &buf[8]);
73 * The brain-damaged webmail program in Microsoft Exchange declares
74 * a charset of "unicode" when they really mean "UTF-8". GNU iconv
75 * treats "unicode" as an alias for "UTF-16" so we have to manually
76 * fix this here, otherwise messages generated in Exchange webmail
77 * show up as a big pile of weird characters.
79 if (!strcasecmp(charset, "unicode")) {
80 strcpy(charset, "UTF-8");
83 /* Remove wandering punctuation */
84 if ((ptr=strchr(charset, '\"'))) *ptr = 0;
92 * Sanitize and enhance an HTML message for display.
93 * Also convert weird character sets to UTF-8 if necessary.
94 * Also fixup img src="cid:..." type inline images to fetch the image
97 void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, StrBuf *Source, StrBuf *Target) {
103 StrBuf *converted_msg;
104 int buffer_length = 1;
106 int content_length = 0;
107 char new_window[SIZ];
111 int script_start_pos = (-1);
115 StrBuf *BodyArea = NULL;
117 iconv_t ic = (iconv_t)(-1) ;
118 char *ibuf; /* Buffer of characters to be converted */
119 char *obuf; /* Buffer for converted characters */
120 size_t ibuflen; /* Length of input buffer */
121 size_t obuflen; /* Length of output buffer */
122 char *osav; /* Saved pointer to output buffer */
127 safestrncpy(charset, supplied_charset, sizeof charset);
129 sprintf(new_window, "<a target=\"%s\" href=", TARGET);
131 if (Source == NULL) while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
132 line_length = strlen(buf);
133 buffer_length = content_length + line_length + 2;
134 ptr = realloc(msg, buffer_length);
136 StrBufAppendPrintf(Target, "<b>");
137 StrBufAppendPrintf(Target, _("realloc() error! couldn't get %d bytes: %s"),
140 StrBufAppendPrintf(Target, "</b><br /><br />\n");
141 while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
148 strcpy(&msg[content_length], buf);
149 content_length += line_length;
150 strcpy(&msg[content_length], "\n");
154 content_length = StrLength(Source);
156 msg = (char*) ChrPtr(Source);/* TODO: remove cast */
157 buffer_length = content_length;
160 /** Do a first pass to isolate the message body */
163 msgend = &msg[content_length];
165 while (ptr < msgend) {
167 /** Advance to next tag */
168 ptr = strchr(ptr, '<');
169 if ((ptr == NULL) || (ptr >= msgend)) break;
171 if ((ptr == NULL) || (ptr >= msgend)) break;
174 * Look for META tags. Some messages (particularly in
175 * Asian locales) illegally declare a message's character
176 * set in the HTML instead of in the MIME headers. This
177 * is wrong but we have to work around it anyway.
179 if (!strncasecmp(ptr, "META", 4)) {
185 char *meta_http_equiv;
189 meta_start = &ptr[4];
190 meta_end = strchr(ptr, '>');
191 if ((meta_end != NULL) && (meta_end <= msgend)) {
192 meta_length = meta_end - meta_start + 1;
193 meta = malloc(meta_length + 1);
194 safestrncpy(meta, meta_start, meta_length);
195 meta[meta_length] = 0;
197 if (!strncasecmp(meta, "HTTP-EQUIV=", 11)) {
198 meta_http_equiv = strdup(&meta[11]);
199 spaceptr = strchr(meta_http_equiv, ' ');
200 if (spaceptr != NULL) {
202 meta_content = strdup(++spaceptr);
203 if (!strncasecmp(meta_content, "content=", 8)) {
204 strcpy(meta_content, &meta_content[8]);
205 stripquotes(meta_http_equiv);
206 stripquotes(meta_content);
207 extract_charset_from_meta(charset,
208 meta_http_equiv, meta_content);
212 free(meta_http_equiv);
219 * Any of these tags cause everything up to and including
220 * the tag to be removed.
222 if ( (!strncasecmp(ptr, "HTML", 4))
223 ||(!strncasecmp(ptr, "HEAD", 4))
224 ||(!strncasecmp(ptr, "/HEAD", 5))
225 ||(!strncasecmp(ptr, "BODY", 4)) ) {
228 if (!strncasecmp(ptr, "BODY", 4)) {
231 ptr = strchr(ptr, '>');
232 if ((ptr == NULL) || (ptr >= msgend)) break;
233 if ((pBody != NULL) && (ptr - pBody > 4)) {
235 char *cid_start, *cid_end;
239 while ((isspace(*pBody)) && (pBody < ptr))
241 BodyArea = NewStrBufPlain(NULL, ptr - pBody);
244 src = strstr(pBody, "cid:");
248 while ((*cid_end != '"') &&
249 !isspace(*cid_end) &&
253 /* copy tag and attributes up to src="cid: */
254 StrBufAppendBufPlain(BodyArea, pBody, src - pBody, 0);
256 /* add in /webcit/mimepart/<msgno>/CID/
257 trailing / stops dumb URL filters getting excited */
258 StrBufAppendPrintf(BodyArea,
259 "/webcit/mimepart/%d/",msgnum);
260 StrBufAppendBufPlain(BodyArea, cid_start, cid_end - cid_start, 0);
262 if (ptr - cid_end > 0)
263 StrBufAppendBufPlain(BodyArea,
268 StrBufAppendBufPlain(BodyArea, pBody, ptr - pBody, 0);
273 if ((ptr == NULL) || (ptr >= msgend)) break;
278 * Any of these tags cause everything including and following
279 * the tag to be removed.
281 if ( (!strncasecmp(ptr, "/HTML", 5))
282 ||(!strncasecmp(ptr, "/BODY", 5)) ) {
291 if (msgstart > msg) {
292 strcpy(msg, msgstart);
295 /* Now go through the message, parsing tags as necessary. */
296 converted_msg = NewStrBufPlain(NULL, content_length + 8192);
299 /** Convert foreign character sets to UTF-8 if necessary. */
301 if ( (strcasecmp(charset, "us-ascii"))
302 && (strcasecmp(charset, "UTF-8"))
303 && (strcasecmp(charset, ""))
305 lprintf(9, "Converting %s to UTF-8\n", charset);
306 ctdl_iconv_open("UTF-8", charset, &ic);
307 if (ic == (iconv_t)(-1) ) {
308 lprintf(5, "%s:%d iconv_open() failed: %s\n",
309 __FILE__, __LINE__, strerror(errno));
312 if (Source == NULL) {
313 if (ic != (iconv_t)(-1) ) {
315 ibuflen = content_length;
316 obuflen = content_length + (content_length / 2) ;
317 obuf = (char *) malloc(obuflen);
319 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
320 content_length = content_length + (content_length / 2) - obuflen;
321 osav[content_length] = 0;
328 if (ic != (iconv_t)(-1) ) {
329 StrBuf *Buf = NewStrBufPlain(NULL, StrLength(Source) + 8096);;
330 StrBufConvert(Source, Buf, &ic);
333 msg = (char*)ChrPtr(Source); /* TODO: get rid of this. */
340 * At this point, the message has been stripped down to
341 * only the content inside the <BODY></BODY> tags, and has
342 * been converted to UTF-8 if it was originally in a foreign
343 * character set. The text is also guaranteed to be null
347 if (converted_msg == NULL) {
348 StrBufAppendPrintf(Target, "Error %d: %s<br />%s:%d", errno, strerror(errno), __FILE__, __LINE__);
352 if (BodyArea != NULL) {
353 StrBufAppendBufPlain(converted_msg, HKEY("<table "), 0);
354 StrBufAppendBuf(converted_msg, BodyArea, 0);
355 StrBufAppendBufPlain(converted_msg, HKEY(" width=\"100%\"><tr><td>"), 0);
358 msgend = strchr(msg, 0);
359 while (ptr < msgend) {
361 /** Try to sanitize the html of any rogue scripts */
362 if (!strncasecmp(ptr, "<script", 7)) {
363 if (scriptlevel == 0) {
364 script_start_pos = StrLength(converted_msg);
368 if (!strncasecmp(ptr, "</script", 8)) {
373 * Change mailto: links to WebCit mail, by replacing the
374 * link with one that points back to our mail room. Due to
375 * the way we parse URL's, it'll even handle mailto: links
376 * that have "?subject=" in them.
378 if (!strncasecmp(ptr, "<a href=\"mailto:", 16)) {
379 content_length += 64;
380 StrBufAppendPrintf(converted_msg,
381 "<a href=\"display_enter?force_room=_MAIL_?recp=");
386 /** Make external links open in a separate window */
387 else if (!strncasecmp(ptr, "<a href=\"", 9)) {
390 if ( ((strchr(ptr, ':') < strchr(ptr, '/')))
391 && ((strchr(ptr, '/') < strchr(ptr, '>')))
393 /* open external links to new window */
394 StrBufAppendPrintf(converted_msg, new_window);
397 else if ( (treat_as_wiki) && (strncasecmp(ptr, "<a href=\"wiki?", 14)) ) {
398 content_length += 64;
399 StrBufAppendPrintf(converted_msg, "<a href=\"wiki?page=");
403 StrBufAppendPrintf(converted_msg, "<a href=\"");
407 /** Fixup <img src="cid:... ...> to fetch the mime part */
408 else if (!strncasecmp(ptr, "<img ", 5)) {
409 char *cid_start, *cid_end;
410 char* tag_end=strchr(ptr,'>');
412 /* FIXME - handle this situation (maybe someone opened an <img cid...
413 * and then ended the message)
416 lprintf(9, "tag_end is null and ptr is:\n");
417 lprintf(9, "%s\n", ptr);
418 lprintf(9, "Theoretical bytes remaining: %d\n", msgend - ptr);
421 src=strstr(ptr, "src=\"cid:");
427 && (cid_start=strchr(src,':'))
428 && (cid_end=strchr(cid_start,'"'))
429 && (cid_end < tag_end)
431 /* copy tag and attributes up to src="cid: */
432 StrBufAppendBufPlain(converted_msg, ptr, src - ptr, 0);
435 /* add in /webcit/mimepart/<msgno>/CID/
436 trailing / stops dumb URL filters getting excited */
437 StrBufAppendPrintf(converted_msg,
438 " src=\"/webcit/mimepart/%d/",msgnum);
439 StrBufAppendBufPlain(converted_msg, cid_start, cid_end - cid_start, 0);
440 StrBufAppendBufPlain(converted_msg, "/\"", -1, 0);
444 StrBufAppendBufPlain(converted_msg, ptr, tag_end - ptr, 0);
449 * Turn anything that looks like a URL into a real link, as long
450 * as it's not inside a tag already
452 else if ( (brak == 0) && (alevel == 0)
453 && (!strncasecmp(ptr, "http://", 7))) {
454 /** Find the end of the link */
458 strlenptr = strlen(ptr);
459 for (i=0; i<=strlenptr; ++i) {
473 /* did s.b. send us an entity? */
475 if ((ptr[i+2] ==';') ||
482 if (linklen > 0) break;
491 linkedchar = ptr[len];
493 /* spot for some subject strings tinymce tends to give us. */
494 ltreviewptr = strchr(ptr, '<');
495 if (ltreviewptr != NULL) {
497 linklen = ltreviewptr - ptr;
500 nbspreviewptr = strstr(ptr, " ");
501 if (nbspreviewptr != NULL) {
502 /* nbspreviewptr = '\0'; */
503 linklen = nbspreviewptr - ptr;
505 if (ltreviewptr != 0)
508 ptr[len] = linkedchar;
510 content_length += (32 + linklen);
511 StrBufAppendPrintf(converted_msg, "%s\"", new_window);
512 StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
513 StrBufAppendPrintf(converted_msg, "\">");
514 StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
516 StrBufAppendPrintf(converted_msg, "</A>");
520 StrBufAppendBufPlain(converted_msg, ptr, 1, 0);
525 if ((ptr >= msg) && (ptr <= msgend)) {
527 * We need to know when we're inside a tag,
528 * so we don't turn things that look like URL's into
529 * links, when they're already links - or image sources.
531 if ((ptr > msg) && (*(ptr-1) == '<')) {
534 if ((ptr > msg) && (*(ptr-1) == '>')) {
536 if ((scriptlevel == 0) && (script_start_pos >= 0)) {
537 StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos);
538 script_start_pos = (-1);
541 if (!strncasecmp(ptr, "</A>", 3)) --alevel;
545 if (BodyArea != NULL) {
546 StrBufAppendBufPlain(converted_msg, HKEY("</td></tr></table>"), 0);
547 FreeStrBuf(&BodyArea);
550 /** uncomment these two lines to override conversion */
551 /** memcpy(converted_msg, msg, content_length); */
552 /** output_length = content_length; */
554 /** Output our big pile of markup */
555 StrBufAppendBuf(Target, converted_msg, 0);
557 BAIL: /** A little trailing vertical whitespace... */
558 StrBufAppendPrintf(Target, "<br /><br />\n");
560 /** Now give back the memory */
561 FreeStrBuf(&converted_msg);
562 if ((msg != NULL) && (Source == NULL)) free(msg);
571 * Look for URL's embedded in a buffer and make them linkable. We use a
572 * target window in order to keep the Citadel session in its own window.
574 void UrlizeText(StrBuf* Target, StrBuf *Source, StrBuf *WrkBuf)
576 int len, UrlLen, Offset, TrailerLen;
577 const char *start, *end, *pos;
582 len = StrLength(Source);
583 end = ChrPtr(Source) + len;
584 for (pos = ChrPtr(Source); (pos < end) && (start == NULL); ++pos) {
585 if (!strncasecmp(pos, "http://", 7))
587 else if (!strncasecmp(pos, "ftp://", 6))
592 StrBufAppendBuf(Target, Source, 0);
597 for (pos = ChrPtr(Source) + len; pos > start; --pos) {
598 if ( (!isprint(*pos))
617 UrlLen = end - start;
618 StrBufAppendBufPlain(WrkBuf, start, UrlLen, 0);
620 Offset = start - ChrPtr(Source);
622 StrBufAppendBufPlain(Target, ChrPtr(Source), Offset, 0);
623 StrBufAppendPrintf(Target, "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
624 LB, QU, ChrPtr(WrkBuf), QU, QU, TARGET,
625 QU, RB, ChrPtr(WrkBuf), LB, RB);
627 TrailerLen = StrLength(Source) - (end - ChrPtr(Source));
629 StrBufAppendBufPlain(Target, end, TrailerLen, 0);
633 void url(char *buf, size_t bufsize)
635 int len, UrlLen, Offset, TrailerLen, outpos;
636 char *start, *end, *pos;
643 lprintf(1, "URL: content longer than buffer!");
647 for (pos = buf; (pos < end) && (start == NULL); ++pos) {
648 if (!strncasecmp(pos, "http://", 7))
650 if (!strncasecmp(pos, "ftp://", 6))
657 for (pos = buf+len; pos > start; --pos) {
658 if ( (!isprint(*pos))
677 UrlLen = end - start;
678 if (UrlLen > sizeof(urlbuf)){
679 lprintf(1, "URL: content longer than buffer!");
682 memcpy(urlbuf, start, UrlLen);
683 urlbuf[UrlLen] = '\0';
685 Offset = start - buf;
686 if ((Offset != 0) && (Offset < sizeof(outbuf)))
687 memcpy(outbuf, buf, Offset);
688 outpos = snprintf(&outbuf[Offset], sizeof(outbuf) - Offset,
689 "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
690 LB, QU, urlbuf, QU, QU, TARGET, QU, RB, urlbuf, LB, RB);
691 if (outpos >= sizeof(outbuf) - Offset) {
692 lprintf(1, "URL: content longer than buffer!");
696 TrailerLen = len - (end - start);
698 memcpy(outbuf + Offset + outpos, end, TrailerLen);
699 if (Offset + outpos + TrailerLen > bufsize) {
700 lprintf(1, "URL: content longer than buffer!");
703 memcpy (buf, outbuf, Offset + outpos + TrailerLen);
704 *(buf + Offset + outpos + TrailerLen) = '\0';