Added a couple more entity declarations
[citadel.git] / libcitadel / lib / html_to_ascii.c
index 90d1103f56e5dbfc629c4c2ca52268978d720b0a..6e4600ab19169317949538f7295e0404a4807268 100644 (file)
@@ -1,22 +1,20 @@
 /*
- * $Id: html.c 6014 2008-02-04 18:38:35Z ajc $
- *
  * Functions which handle translation between HTML and plain text
  * Copyright (c) 2000-2010 by the citadel.org team
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
  *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <stdlib.h>
  * screenwidth   = desired output screenwidth
  * do_citaformat = set to 1 to indent newlines with spaces
  */
-char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat) {
+char *html_to_ascii(const char *inputmsg, int msglen, int screenwidth, int do_citaformat) {
        char inbuf[SIZ];
        int inbuf_len = 0;
        char outbuf[SIZ];
        char tag[1024];
        int done_reading = 0;
-       char *inptr;
+       const char *inptr;
        char *outptr;
        size_t outptr_buffer_size;
        size_t output_len = 0;
@@ -471,9 +469,23 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                                strcpy(&outbuf[i+1], &outbuf[i+7]);
                        }
 
+                       else if (!strncasecmp(&outbuf[i], "&#8217;", 7)) {
+                               outbuf[i] = '\'';
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
+                       else if (!strncasecmp(&outbuf[i], "&#8211;", 7)) {
+                               outbuf[i] = '-';
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
                        /* two-digit decimal equivalents */
-                       else if ((!strncmp(&outbuf[i], "&#", 2))
-                             && (outbuf[i+4] == ';') ) {
+                       else if (outbuf[i] == '&'       &&
+                                outbuf[i + 1] == '#'   &&
+                                isdigit(outbuf[i + 2]) && 
+                                isdigit(outbuf[i + 3]) &&
+                                (outbuf[i+4] == ';') ) 
+                       {
                                scanch = 0;
                                sscanf(&outbuf[i+2], "%02d", &scanch);
                                outbuf[i] = scanch;
@@ -481,14 +493,34 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                        }
 
                        /* three-digit decimal equivalents */
-                       else if ((!strncmp(&outbuf[i], "&#", 2))
-                             && (outbuf[i+5] == ';') ) {
+                       else if (outbuf[i] == '&'       &&
+                                outbuf[i + 1] == '#'   &&
+                                isdigit(outbuf[i + 2]) && 
+                                isdigit(outbuf[i + 3]) && 
+                                isdigit(outbuf[i + 4]) &&
+                                (outbuf[i + 5] == ';') ) 
+                       {
                                scanch = 0;
                                sscanf(&outbuf[i+2], "%03d", &scanch);
                                outbuf[i] = scanch;
                                strcpy(&outbuf[i+1], &outbuf[i+6]);
                        }
 
+                       /* four-digit decimal equivalents */
+                       else if (outbuf[i] == '&'       &&
+                                outbuf[i + 1] == '#'   &&
+                                isdigit(outbuf[i + 2]) && 
+                                isdigit(outbuf[i + 3]) && 
+                                isdigit(outbuf[i + 4]) &&
+                                isdigit(outbuf[i + 5]) &&
+                                (outbuf[i + 6] == ';') ) 
+                       {
+                               scanch = 0;
+                               sscanf(&outbuf[i+2], "%04d", &scanch);
+                               outbuf[i] = scanch;
+                               strcpy(&outbuf[i+1], &outbuf[i+7]);
+                       }
+
                }
 
                /* Make sure the output buffer is big enough */
@@ -570,7 +602,7 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform
                --output_len;
        }
 
-       if (outptr[output_len-1] != '\n') {
+       if ((output_len > 0) && (outptr[output_len-1] != '\n')) {
                strcat(outptr, "\n");
                ++output_len;
        }