]> code.citadel.org Git - citadel.git/blobdiff - citadel/html.c
* msgbase.c: when a summary mode message list is requested, and the room
[citadel.git] / citadel / html.c
index 847bfa0404ca8e71b8d2b2b55663bfc0df02a942..8ff0bbc936d058a80093c20a7f15565a370da0b4 100644 (file)
@@ -2,14 +2,10 @@
  * $Id$
  *
  * Functions which handle translation between HTML and plain text
- * Copyright (c) 2000-2001 by Art Cancro and others.   This program is
+ * Copyright (c) 2000-2005 by Art Cancro and others.   This program is
  * released under the terms of the GNU General Public License.
  */
 
-#ifdef DLL_EXPORT
-#define IN_LIBCIT
-#endif
-
 #include "sysdep.h"
 #include <stdlib.h>
 #include <unistd.h>
@@ -52,7 +48,7 @@
  * screenwidth   = desired output screenwidth
  * do_citaformat = set to 1 to indent newlines with spaces
  */
-char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
+char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat) {
        char inbuf[SIZ];
        char outbuf[SIZ];
        char tag[1024];
@@ -64,10 +60,12 @@ char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
        int i, j, ch, did_out, rb, scanch;
        int nest = 0;           /* Bracket nesting level */
        int blockquote = 0;     /* BLOCKQUOTE nesting level */
+       int bytes_processed = 0;
 
        inptr = inputmsg;
        strcpy(inbuf, "");
        strcpy(outbuf, "");
+       if (msglen == 0) msglen = strlen(inputmsg);
 
        outptr_buffer_size = strlen(inptr) + SIZ;
        outptr = malloc(outptr_buffer_size);
@@ -88,6 +86,11 @@ char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
                                done_reading = 1;
                        }
 
+                       ++bytes_processed;
+                       if (bytes_processed > msglen) {
+                               done_reading = 1;
+                       }
+
                }
 
                /* Do some parsing */
@@ -220,11 +223,27 @@ char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
                /* Convert &; tags to the forbidden characters */
                if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
 
+                       /* Character entity references */
                        if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
                                outbuf[i] = ' ';
                                strcpy(&outbuf[i+1], &outbuf[i+6]);
                        }
 
+                       if (!strncasecmp(&outbuf[i], "&ensp;", 6)) {
+                               outbuf[i] = ' ';
+                               strcpy(&outbuf[i+1], &outbuf[i+6]);
+                       }
+
+                       if (!strncasecmp(&outbuf[i], "&emsp;", 6)) {
+                               outbuf[i] = ' ';
+                               strcpy(&outbuf[i+1], &outbuf[i+6]);
+                       }
+
+                       if (!strncasecmp(&outbuf[i], "&thinsp;", 8)) {
+                               outbuf[i] = ' ';
+                               strcpy(&outbuf[i+1], &outbuf[i+8]);
+                       }
+
                        else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
                                outbuf[i] = '<';
                                strcpy(&outbuf[i+1], &outbuf[i+4]);
@@ -244,6 +263,16 @@ char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
                                strcpy(&outbuf[i+1], &outbuf[i+6]);
                        }
 
+                       else if (!strncasecmp(&outbuf[i], "&lsquo;", 7)) {
+                               outbuf[i] = '`';
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&rsquo;", 7)) {
+                               outbuf[i] = '\'';
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
                        else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
                                outbuf[i] = '(';
                                outbuf[i+1] = 'c';
@@ -251,6 +280,21 @@ char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
                                strcpy(&outbuf[i+3], &outbuf[i+6]);
                        }
 
+                       else if (!strncasecmp(&outbuf[i], "&hellip;", 8)) {
+                               outbuf[i] = '.';
+                               outbuf[i+1] = '.';
+                               outbuf[i+2] = '.';
+                               strcpy(&outbuf[i+3], &outbuf[i+8]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&trade;", 7)) {
+                               outbuf[i] = '(';
+                               outbuf[i+1] = 't';
+                               outbuf[i+2] = 'm';
+                               outbuf[i+3] = ')';
+                               strcpy(&outbuf[i+4], &outbuf[i+7]);
+                       }
+
                        else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
                                outbuf[i] = '(';
                                outbuf[i+1] = 'r';
@@ -258,6 +302,40 @@ char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
                                strcpy(&outbuf[i+3], &outbuf[i+5]);
                        }
 
+                       else if (!strncasecmp(&outbuf[i], "&frac14;", 8)) {
+                               outbuf[i] = '1';
+                               outbuf[i+1] = '/';
+                               outbuf[i+2] = '4';
+                               strcpy(&outbuf[i+3], &outbuf[i+8]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&frac12;", 8)) {
+                               outbuf[i] = '1';
+                               outbuf[i+1] = '/';
+                               outbuf[i+2] = '2';
+                               strcpy(&outbuf[i+3], &outbuf[i+8]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&frac34;", 8)) {
+                               outbuf[i] = '3';
+                               outbuf[i+1] = '/';
+                               outbuf[i+2] = '4';
+                               strcpy(&outbuf[i+3], &outbuf[i+8]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&ndash;", 7)) {
+                               outbuf[i] = '-';
+                               outbuf[i+1] = '-';
+                               strcpy(&outbuf[i+2], &outbuf[i+7]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&mdash;", 7)) {
+                               outbuf[i] = '-';
+                               outbuf[i+1] = '-';
+                               outbuf[i+2] = '-';
+                               strcpy(&outbuf[i+3], &outbuf[i+7]);
+                       }
+
                        /* two-digit decimal equivalents */
                        else if ((!strncmp(&outbuf[i], "&#", 2))
                              && (outbuf[i+4] == ';') ) {