X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=libcitadel%2Flib%2Fhtml_to_ascii.c;h=6e4600ab19169317949538f7295e0404a4807268;hb=de0eab6118786f81de402fbef2f6d6ec116994aa;hp=e7d7fc5dd96a772a717a4c3ec8531d1d1f7eec06;hpb=4bb58f061555ed9770803727c1408ede497deb32;p=citadel.git diff --git a/libcitadel/lib/html_to_ascii.c b/libcitadel/lib/html_to_ascii.c index e7d7fc5dd..6e4600ab1 100644 --- a/libcitadel/lib/html_to_ascii.c +++ b/libcitadel/lib/html_to_ascii.c @@ -1,9 +1,20 @@ /* - * $Id: html.c 6014 2008-02-04 18:38:35Z ajc $ - * * Functions which handle translation between HTML and plain text - * Copyright (c) 2000-2005 by Art Cancro and others. This program is - * released under the terms of the GNU General Public License. + * Copyright (c) 2000-2010 by the citadel.org team + * + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -38,13 +49,13 @@ * screenwidth = desired output screenwidth * do_citaformat = set to 1 to indent newlines with spaces */ -char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat) { +char *html_to_ascii(const char *inputmsg, int msglen, int screenwidth, int do_citaformat) { char inbuf[SIZ]; int inbuf_len = 0; char outbuf[SIZ]; char tag[1024]; int done_reading = 0; - char *inptr; + const char *inptr; char *outptr; size_t outptr_buffer_size; size_t output_len = 0; @@ -98,14 +109,11 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform if (inbuf[i]==10) inbuf[i]=32; if (inbuf[i]==13) inbuf[i]=32; if (inbuf[i]==9) inbuf[i]=32; - /*** we like foreign characters now. - if ((inbuf[i]<32) || (inbuf[i]>126)) { - inbuf[i] = '?'; - } */ } for (i=0; !IsEmptyStr(&inbuf[i]); ++i) { - while ((inbuf[i]==32)&&(inbuf[i+1]==32)) + while ((inbuf[i]==32)&&(inbuf[i+1]==32)) { strcpy(&inbuf[i], &inbuf[i+1]); + } } for (i=0; !IsEmptyStr(&inbuf[i]); ++i) { @@ -189,6 +197,34 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strcat(outbuf, nl); } + else if ( + (!strcasecmp(tag, "B")) + || (!strcasecmp(tag, "/B")) + || (!strcasecmp(tag, "STRONG")) + || (!strcasecmp(tag, "/STRONG")) + ) { + strcat(outbuf, "*"); + + } + + else if ( + (!strcasecmp(tag, "I")) + || (!strcasecmp(tag, "/I")) + || (!strcasecmp(tag, "EM")) + || (!strcasecmp(tag, "/EM")) + ) { + strcat(outbuf, "/"); + + } + + else if ( + (!strcasecmp(tag, "U")) + || (!strcasecmp(tag, "/U")) + ) { + strcat(outbuf, "_"); + + } + else if (!strcasecmp(tag, "BR")) { strcat(outbuf, nl); } @@ -433,9 +469,23 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strcpy(&outbuf[i+1], &outbuf[i+7]); } + else if (!strncasecmp(&outbuf[i], "’", 7)) { + outbuf[i] = '\''; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + + else if (!strncasecmp(&outbuf[i], "–", 7)) { + outbuf[i] = '-'; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + /* two-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+4] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + (outbuf[i+4] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%02d", &scanch); outbuf[i] = scanch; @@ -443,14 +493,34 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform } /* three-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+5] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + (outbuf[i + 5] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%03d", &scanch); outbuf[i] = scanch; strcpy(&outbuf[i+1], &outbuf[i+6]); } + /* four-digit decimal equivalents */ + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + isdigit(outbuf[i + 5]) && + (outbuf[i + 6] == ';') ) + { + scanch = 0; + sscanf(&outbuf[i+2], "%04d", &scanch); + outbuf[i] = scanch; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + } /* Make sure the output buffer is big enough */ @@ -532,7 +602,7 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform --output_len; } - if (outptr[output_len-1] != '\n') { + if ((output_len > 0) && (outptr[output_len-1] != '\n')) { strcat(outptr, "\n"); ++output_len; }