X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=libcitadel%2Flib%2Fhtml_to_ascii.c;h=45beb2cbb079b97eeaa2041df905e8676ce3145a;hb=21185d642546b081924a541bad2adcaad9416fca;hp=ccf074a48574e8dbb4b15e71cb38da1d15eae401;hpb=56d69e5d8434e98835a2582c59b771ba69475431;p=citadel.git diff --git a/libcitadel/lib/html_to_ascii.c b/libcitadel/lib/html_to_ascii.c index ccf074a48..45beb2cbb 100644 --- a/libcitadel/lib/html_to_ascii.c +++ b/libcitadel/lib/html_to_ascii.c @@ -1,8 +1,8 @@ /* * Functions which handle translation between HTML and plain text - * Copyright (c) 2000-2010 by the citadel.org team + * Copyright (c) 2000-2018 by the citadel.org team * - * This program is free software; you can redistribute it and/or modify + * This program is open source software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. @@ -47,15 +47,14 @@ * * inputmsg = pointer to raw HTML message * screenwidth = desired output screenwidth - * do_citaformat = set to 1 to indent newlines with spaces */ -char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat) { +char *html_to_ascii(const char *inputmsg, int msglen, int screenwidth) { char inbuf[SIZ]; int inbuf_len = 0; char outbuf[SIZ]; char tag[1024]; int done_reading = 0; - char *inptr; + const char *inptr; char *outptr; size_t outptr_buffer_size; size_t output_len = 0; @@ -67,6 +66,7 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform int bytes_processed = 0; char nl[128]; + tag[0] = '\0'; strcpy(nl, "\n"); inptr = inputmsg; strcpy(inbuf, ""); @@ -197,6 +197,9 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strcat(outbuf, nl); } +#if 0 + These seemed like a good idea at the time, but it just makes a mess. + else if ( (!strcasecmp(tag, "B")) || (!strcasecmp(tag, "/B")) @@ -204,7 +207,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform || (!strcasecmp(tag, "/STRONG")) ) { strcat(outbuf, "*"); - } else if ( @@ -214,7 +216,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform || (!strcasecmp(tag, "/EM")) ) { strcat(outbuf, "/"); - } else if ( @@ -222,8 +223,8 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform || (!strcasecmp(tag, "/U")) ) { strcat(outbuf, "_"); - } +#endif else if (!strcasecmp(tag, "BR")) { strcat(outbuf, nl); @@ -273,7 +274,7 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform tag[strlen(tag)] = ch; } - else if (!nest) { + else if ((!nest) && (styletag == 0)) { outbuf[strlen(outbuf)+1] = 0; outbuf[strlen(outbuf)] = ch; } @@ -469,9 +470,23 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strcpy(&outbuf[i+1], &outbuf[i+7]); } + else if (!strncasecmp(&outbuf[i], "’", 7)) { + outbuf[i] = '\''; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + + else if (!strncasecmp(&outbuf[i], "–", 7)) { + outbuf[i] = '-'; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + /* two-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+4] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + (outbuf[i+4] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%02d", &scanch); outbuf[i] = scanch; @@ -479,14 +494,34 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform } /* three-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+5] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + (outbuf[i + 5] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%03d", &scanch); outbuf[i] = scanch; strcpy(&outbuf[i+1], &outbuf[i+6]); } + /* four-digit decimal equivalents */ + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + isdigit(outbuf[i + 5]) && + (outbuf[i + 6] == ';') ) + { + scanch = 0; + sscanf(&outbuf[i+2], "%04d", &scanch); + outbuf[i] = scanch; + strcpy(&outbuf[i+1], &outbuf[i+7]); + } + } /* Make sure the output buffer is big enough */ @@ -508,11 +543,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform strncpy(&outptr[output_len], outbuf, i+1); output_len += (i+1); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } - strcpy(outbuf, &outbuf[i+1]); i = 0; did_out = 1; @@ -532,10 +562,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform output_len += rb; strcpy(&outptr[output_len], nl); output_len += strlen(nl); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } strcpy(outbuf, &outbuf[rb+1]); } else { strncpy(&outptr[output_len], outbuf, @@ -543,10 +569,6 @@ char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaform output_len += (screenwidth-2); strcpy(&outptr[output_len], nl); output_len += strlen(nl); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } strcpy(outbuf, &outbuf[screenwidth-2]); } }