X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=libcitadel%2Flib%2Fhtml_to_ascii.c;h=5ea1e2756623e57c362c6a12b3dcd331b906d36b;hb=94849fa2121eef82d797137193a2ee6eac56bc96;hp=e2cb2b77abe96b48963b25f224f8e0ce8abaf419;hpb=570f816b5024916402d429fbcff0f094779a943a;p=citadel.git diff --git a/libcitadel/lib/html_to_ascii.c b/libcitadel/lib/html_to_ascii.c index e2cb2b77a..5ea1e2756 100644 --- a/libcitadel/lib/html_to_ascii.c +++ b/libcitadel/lib/html_to_ascii.c @@ -1,9 +1,20 @@ /* - * $Id: html.c 6014 2008-02-04 18:38:35Z ajc $ - * * Functions which handle translation between HTML and plain text - * Copyright (c) 2000-2005 by Art Cancro and others. This program is - * released under the terms of the GNU General Public License. + * Copyright (c) 2000-2018 by the citadel.org team + * + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -36,15 +47,14 @@ * * inputmsg = pointer to raw HTML message * screenwidth = desired output screenwidth - * do_citaformat = set to 1 to indent newlines with spaces */ -char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat) { +char *html_to_ascii(const char *inputmsg, int msglen, int screenwidth) { char inbuf[SIZ]; int inbuf_len = 0; char outbuf[SIZ]; char tag[1024]; int done_reading = 0; - char *inptr; + const char *inptr; char *outptr; size_t outptr_buffer_size; size_t output_len = 0; @@ -56,6 +66,7 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform int bytes_processed = 0; char nl[128]; + tag[0] = '\0'; strcpy(nl, "\n"); inptr = inputmsg; strcpy(inbuf, ""); @@ -186,6 +197,35 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strcat(outbuf, nl); } +/**** + These seemed like a good idea at the time, but it just makes a mess. + + else if ( + (!strcasecmp(tag, "B")) + || (!strcasecmp(tag, "/B")) + || (!strcasecmp(tag, "STRONG")) + || (!strcasecmp(tag, "/STRONG")) + ) { + strcat(outbuf, "*"); + } + + else if ( + (!strcasecmp(tag, "I")) + || (!strcasecmp(tag, "/I")) + || (!strcasecmp(tag, "EM")) + || (!strcasecmp(tag, "/EM")) + ) { + strcat(outbuf, "/"); + } + + else if ( + (!strcasecmp(tag, "U")) + || (!strcasecmp(tag, "/U")) + ) { + strcat(outbuf, "_"); + } +****/ + else if (!strcasecmp(tag, "BR")) { strcat(outbuf, nl); } @@ -430,9 +470,23 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strcpy(&outbuf[i+1], &outbuf[i+7]); } + else if (!strncasecmp(&outbuf[i], "’", 7)) { + outbuf[i] = '\''; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + + else if (!strncasecmp(&outbuf[i], "–", 7)) { + outbuf[i] = '-'; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + /* two-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+4] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + (outbuf[i+4] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%02d", &scanch); outbuf[i] = scanch; @@ -440,14 +494,34 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform } /* three-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+5] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + (outbuf[i + 5] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%03d", &scanch); outbuf[i] = scanch; strcpy(&outbuf[i+1], &outbuf[i+6]); } + /* four-digit decimal equivalents */ + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + isdigit(outbuf[i + 5]) && + (outbuf[i + 6] == ';') ) + { + scanch = 0; + sscanf(&outbuf[i+2], "%04d", &scanch); + outbuf[i] = scanch; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + } /* Make sure the output buffer is big enough */ @@ -469,11 +543,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strncpy(&outptr[output_len], outbuf, i+1); output_len += (i+1); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } - strcpy(outbuf, &outbuf[i+1]); i = 0; did_out = 1; @@ -493,10 +562,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform output_len += rb; strcpy(&outptr[output_len], nl); output_len += strlen(nl); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } strcpy(outbuf, &outbuf[rb+1]); } else { strncpy(&outptr[output_len], outbuf, @@ -504,10 +569,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform output_len += (screenwidth-2); strcpy(&outptr[output_len], nl); output_len += strlen(nl); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } strcpy(outbuf, &outbuf[screenwidth-2]); } } @@ -529,7 +590,7 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform --output_len; } - if (outptr[output_len-1] != '\n') { + if ((output_len > 0) && (outptr[output_len-1] != '\n')) { strcat(outptr, "\n"); ++output_len; }