striplt(buf);
if (!strncasecmp(buf, "charset=", 8)) {
strcpy(charset, &buf[8]);
+
+ /*
+ * The brain-damaged webmail program in Microsoft Exchange declares
+ * a charset of "unicode" when they really mean "UTF-8". GNU iconv
+ * treats "unicode" as an alias for "UTF-16" so we have to manually
+ * fix this here, otherwise messages generated in Exchange webmail
+ * show up as a big pile of weird characters.
+ */
+ if (!strcasecmp(charset, "unicode")) {
+ strcpy(charset, "UTF-8");
+ }
+
}
}
char *msgstart;
char *msgend;
char *converted_msg;
+ size_t converted_alloc = 0;
int buffer_length = 1;
int line_length = 0;
int content_length = 0;
char new_window[SIZ];
int brak = 0;
int alevel = 0;
+ int scriptlevel = 0;
+ int script_start_pos = (-1);
int i;
int linklen;
char charset[128];
*/
/** Now go through the message, parsing tags as necessary. */
- converted_msg = malloc(content_length);
+ converted_alloc = content_length + 8192;
+ converted_msg = malloc(converted_alloc);
+ if (converted_msg == NULL) {
+ wprintf("Error %d: %s<br />%s:%s", errno, strerror(errno), __FILE__, __LINE__);
+ goto BAIL;
+ }
+
strcpy(converted_msg, "");
ptr = msg;
msgend = strchr(msg, 0);
while (ptr < msgend) {
+ /** Try to sanitize the html of any rogue scripts */
+ if (!strncasecmp(ptr, "<script", 7)) {
+ if (scriptlevel == 0) {
+ script_start_pos = output_length;
+ }
+ ++scriptlevel;
+ }
+ if (!strncasecmp(ptr, "</script", 8)) {
+ --scriptlevel;
+ }
+
/**
* Change mailto: links to WebCit mail, by replacing the
* link with one that points back to our mail room. Due to
*/
if (!strncasecmp(ptr, "<a href=\"mailto:", 16)) {
content_length += 64;
- converted_msg = realloc(converted_msg, content_length);
+ if (content_length >= converted_alloc) {
+ converted_alloc += 8192;
+ converted_msg = realloc(converted_msg, converted_alloc);
+ if (converted_msg == NULL) {
+ abort();
+ }
+ }
sprintf(&converted_msg[output_length],
- "<a href=\"display_enter"
- "?force_room=_MAIL_&recp=");
- output_length += 47;
+ "<a href=\"display_enter?force_room=_MAIL_&recp=");
+ output_length += 46;
ptr = &ptr[16];
++alevel;
+ ++brak;
}
/** Make external links open in a separate window */
- else if (!strncasecmp(ptr, "<a href=", 8)) {
+ else if (!strncasecmp(ptr, "<a href=\"", 9)) {
++alevel;
+ ++brak;
if ( ((strchr(ptr, ':') < strchr(ptr, '/')))
&& ((strchr(ptr, '/') < strchr(ptr, '>')))
) {
/* open external links to new window */
content_length += 64;
- converted_msg = realloc(converted_msg, content_length);
+ if (content_length >= converted_alloc) {
+ converted_alloc += 8192;
+ converted_msg = realloc(converted_msg, converted_alloc);
+ if (converted_msg == NULL) {
+ abort();
+ }
+ }
sprintf(&converted_msg[output_length], new_window);
output_length += strlen(new_window);
ptr = &ptr[8];
}
else if ( (treat_as_wiki) && (strncasecmp(ptr, "<a href=\"wiki?", 14)) ) {
content_length += 64;
- converted_msg = realloc(converted_msg, content_length);
+ if (content_length >= converted_alloc) {
+ converted_alloc += 8192;
+ converted_msg = realloc(converted_msg, converted_alloc);
+ if (converted_msg == NULL) {
+ abort();
+ }
+ }
sprintf(&converted_msg[output_length], "<a href=\"wiki?page=");
output_length += 19;
ptr = &ptr[9];
}
else {
- sprintf(&converted_msg[output_length], "<a href=");
- output_length += 8;
- ptr = &ptr[8];
+ sprintf(&converted_msg[output_length], "<a href=\"");
+ output_length += 9;
+ ptr = &ptr[9];
}
}
+
/**
* Turn anything that looks like a URL into a real link, as long
* as it's not inside a tag already
*/
else if ( (brak == 0) && (alevel == 0)
&& (!strncasecmp(ptr, "http://", 7))) {
- linklen = 0;
/** Find the end of the link */
- for (i=0; i<=strlen(ptr); ++i) {
+ int strlenptr;
+ linklen = 0;
+ strlenptr = strlen(ptr);
+ for (i=0; i<=strlenptr; ++i) {
if ((ptr[i]==0)
||(isspace(ptr[i]))
||(ptr[i]==10)
if (linklen > 0) break;
}
if (linklen > 0) {
+ char *ltreviewptr;
+ char *nbspreviewptr;
+ //* spot for some subject strings tinymce tends to give us.
+ ltreviewptr = strchr(ptr, '<');
+ if (ltreviewptr != NULL) {
+ *ltreviewptr = '\0';
+ linklen = ltreviewptr - ptr;
+ }
+
+ nbspreviewptr = strstr(ptr, " ");
+ if (nbspreviewptr != NULL) {
+ ///*nbspreviewptr = '\0';
+ linklen = nbspreviewptr - ptr;
+ }
+ if (ltreviewptr != 0)
+ *ltreviewptr = '<';
+
content_length += (32 + linklen);
- converted_msg = realloc(converted_msg, content_length);
+ if (content_length >= converted_alloc) {
+ converted_alloc += 8192;
+ converted_msg = realloc(converted_msg, converted_alloc);
+ if (converted_msg == NULL) {
+ abort();
+ }
+ }
sprintf(&converted_msg[output_length], new_window);
output_length += strlen(new_window);
converted_msg[output_length] = '\"';
}
}
else {
- /**
- * We need to know when we're inside a tag,
- * so we don't turn things that look like URL's into
- * links, when they're already links - or image sources.
- */
- if (*ptr == '<') ++brak;
- if (*ptr == '>') --brak;
- if (!strncasecmp(ptr, "</A>", 3)) --alevel;
converted_msg[output_length] = *ptr++;
converted_msg[++output_length] = 0;
}
+
+ /**
+ * We need to know when we're inside a tag,
+ * so we don't turn things that look like URL's into
+ * links, when they're already links - or image sources.
+ */
+ if (*(ptr-1) == '<') {
+ ++brak;
+ }
+ if (*(ptr-1) == '>') {
+ --brak;
+ if ((scriptlevel == 0) && (script_start_pos >= 0)) {
+ output_length = script_start_pos;
+ converted_msg[output_length] = 0;
+ script_start_pos = (-1);
+ }
+ }
+ if (!strncasecmp(ptr, "</A>", 3)) --alevel;
}
/** uncomment these two lines to override conversion */
/** Output our big pile of markup */
client_write(converted_msg, output_length);
- /** A little trailing vertical whitespace... */
+BAIL: /** A little trailing vertical whitespace... */
wprintf("<br /><br />\n");
/** Now give back the memory */
- free(converted_msg);
- free(msg);
+ if (converted_msg != NULL) free(converted_msg);
+ if (msg != NULL) free(msg);
}
/*@}*/