Added a couple more entity declarations
[citadel.git] / libcitadel / lib / html_to_ascii.c
index e7d7fc5dd96a772a717a4c3ec8531d1d1f7eec06..6e4600ab19169317949538f7295e0404a4807268 100644 (file)
@@ -1,9 +1,20 @@
 /*
- * $Id: html.c 6014 2008-02-04 18:38:35Z ajc $
- *
  * Functions which handle translation between HTML and plain text
- * Copyright (c) 2000-2005 by Art Cancro and others.   This program is
- * released under the terms of the GNU General Public License.
+ * Copyright (c) 2000-2010 by the citadel.org team
+ *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <stdlib.h>
  * screenwidth   = desired output screenwidth
  * do_citaformat = set to 1 to indent newlines with spaces
  */
-char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat) {
+char *html_to_ascii(const char *inputmsg, int msglen, int screenwidth, int do_citaformat) {
        char inbuf[SIZ];
        int inbuf_len = 0;
        char outbuf[SIZ];
        char tag[1024];
        int done_reading = 0;
-       char *inptr;
+       const char *inptr;
        char *outptr;
        size_t outptr_buffer_size;
        size_t output_len = 0;
@@ -98,14 +109,11 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                        if (inbuf[i]==10) inbuf[i]=32;
                        if (inbuf[i]==13) inbuf[i]=32;
                        if (inbuf[i]==9) inbuf[i]=32;
-                       /*** we like foreign characters now.
-                       if ((inbuf[i]<32) || (inbuf[i]>126)) {
-                               inbuf[i] = '?';
-                       } */
                    }
                    for (i=0; !IsEmptyStr(&inbuf[i]); ++i) {
-                       while ((inbuf[i]==32)&&(inbuf[i+1]==32))
+                       while ((inbuf[i]==32)&&(inbuf[i+1]==32)) {
                                strcpy(&inbuf[i], &inbuf[i+1]);
+                       }
                    }
 
                    for (i=0; !IsEmptyStr(&inbuf[i]); ++i) {
@@ -189,6 +197,34 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                                        strcat(outbuf, nl);
                                }
 
+                               else if (
+                                       (!strcasecmp(tag, "B"))
+                                       || (!strcasecmp(tag, "/B"))
+                                       || (!strcasecmp(tag, "STRONG"))
+                                       || (!strcasecmp(tag, "/STRONG"))
+                               ) {
+                                       strcat(outbuf, "*");
+                                       
+                               }
+
+                               else if (
+                                       (!strcasecmp(tag, "I"))
+                                       || (!strcasecmp(tag, "/I"))
+                                       || (!strcasecmp(tag, "EM"))
+                                       || (!strcasecmp(tag, "/EM"))
+                               ) {
+                                       strcat(outbuf, "/");
+                                       
+                               }
+
+                               else if (
+                                       (!strcasecmp(tag, "U"))
+                                       || (!strcasecmp(tag, "/U"))
+                               ) {
+                                       strcat(outbuf, "_");
+                                       
+                               }
+
                                else if (!strcasecmp(tag, "BR")) {
                                        strcat(outbuf, nl);
                                }
@@ -433,9 +469,23 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                                strcpy(&outbuf[i+1], &outbuf[i+7]);
                        }
 
+                       else if (!strncasecmp(&outbuf[i], "&#8217;", 7)) {
+                               outbuf[i] = '\'';
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&#8211;", 7)) {
+                               outbuf[i] = '-';
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
                        /* two-digit decimal equivalents */
-                       else if ((!strncmp(&outbuf[i], "&#", 2))
-                             && (outbuf[i+4] == ';') ) {
+                       else if (outbuf[i] == '&'       &&
+                                outbuf[i + 1] == '#'   &&
+                                isdigit(outbuf[i + 2]) && 
+                                isdigit(outbuf[i + 3]) &&
+                                (outbuf[i+4] == ';') ) 
+                       {
                                scanch = 0;
                                sscanf(&outbuf[i+2], "%02d", &scanch);
                                outbuf[i] = scanch;
@@ -443,14 +493,34 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                        }
 
                        /* three-digit decimal equivalents */
-                       else if ((!strncmp(&outbuf[i], "&#", 2))
-                             && (outbuf[i+5] == ';') ) {
+                       else if (outbuf[i] == '&'       &&
+                                outbuf[i + 1] == '#'   &&
+                                isdigit(outbuf[i + 2]) && 
+                                isdigit(outbuf[i + 3]) && 
+                                isdigit(outbuf[i + 4]) &&
+                                (outbuf[i + 5] == ';') ) 
+                       {
                                scanch = 0;
                                sscanf(&outbuf[i+2], "%03d", &scanch);
                                outbuf[i] = scanch;
                                strcpy(&outbuf[i+1], &outbuf[i+6]);
                        }
 
+                       /* four-digit decimal equivalents */
+                       else if (outbuf[i] == '&'       &&
+                                outbuf[i + 1] == '#'   &&
+                                isdigit(outbuf[i + 2]) && 
+                                isdigit(outbuf[i + 3]) && 
+                                isdigit(outbuf[i + 4]) &&
+                                isdigit(outbuf[i + 5]) &&
+                                (outbuf[i + 6] == ';') ) 
+                       {
+                               scanch = 0;
+                               sscanf(&outbuf[i+2], "%04d", &scanch);
+                               outbuf[i] = scanch;
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
                }
 
                /* Make sure the output buffer is big enough */
@@ -532,7 +602,7 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                --output_len;
        }
 
-       if (outptr[output_len-1] != '\n') {
+       if ((output_len > 0) && (outptr[output_len-1] != '\n')) {
                strcat(outptr, "\n");
                ++output_len;
        }