From 4bb58f061555ed9770803727c1408ede497deb32 Mon Sep 17 00:00:00 2001 From: Art Cancro Date: Thu, 28 Feb 2008 17:28:52 +0000 Subject: [PATCH] html_to_ascii() moved into libcitadel --- citadel/Makefile.in | 2 +- citadel/citadel.h | 2 +- citadel/html.h | 6 - citadel/journaling.c | 1 - citadel/messages.c | 1 - citadel/msgbase.c | 1 - libcitadel/Makefile.in | 4 +- .../html.c => libcitadel/lib/html_to_ascii.c | 25 +- libcitadel/lib/libcitadel.h | 3 +- webcit/Makefile.in | 4 +- webcit/html.c | 520 ------------------ webcit/webcit.h | 3 +- 12 files changed, 18 insertions(+), 554 deletions(-) delete mode 100644 citadel/html.h rename citadel/html.c => libcitadel/lib/html_to_ascii.c (97%) delete mode 100644 webcit/html.c diff --git a/citadel/Makefile.in b/citadel/Makefile.in index 0a2b3db0b..1897e0f57 100644 --- a/citadel/Makefile.in +++ b/citadel/Makefile.in @@ -72,7 +72,7 @@ SOURCES=aidepost.c auth.c base64.c chkpwd.c chkpw.c citadel.c citadel_ipc.c \ citmail.c citserver.c client_chat.c client_passwords.c \ clientsocket.c commands.c config.c control.c $(DATABASE) \ domain.c serv_extensions.c file_ops.c genstamp.c getutline.c \ - housekeeping.c html.c ical_dezonify.c \ + housekeeping.c ical_dezonify.c \ internet_addressing.c ecrash.c \ ipc_c_tcp.c locate_host.c md5.c messages.c \ msgbase.c msgform.c parsedate.c policy.c \ diff --git a/citadel/citadel.h b/citadel/citadel.h index 9d380de42..4cbe92b60 100644 --- a/citadel/citadel.h +++ b/citadel/citadel.h @@ -50,7 +50,7 @@ extern "C" { #define REV_LEVEL 732 /* This version */ #define REV_MIN 591 /* Oldest compatible database */ #define EXPORT_REV_MIN 725 /* Oldest compatible export files */ -#define LIBCITADEL_MIN 105 /* Minimum required version of libcitadel */ +#define LIBCITADEL_MIN 107 /* Minimum required version of libcitadel */ #define SERVER_TYPE 0 /* zero for stock Citadel; other developers please obtain SERVER_TYPE codes for your implementations */ diff --git a/citadel/html.h b/citadel/html.h deleted file mode 100644 index 7e23ff301..000000000 --- a/citadel/html.h +++ /dev/null @@ -1,6 +0,0 @@ -/* - * $Id$ - * - */ - -char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat); diff --git a/citadel/journaling.c b/citadel/journaling.c index b3c79e0ab..9fc8246ea 100644 --- a/citadel/journaling.c +++ b/citadel/journaling.c @@ -42,7 +42,6 @@ #include "file_ops.h" #include "config.h" #include "control.h" -#include "html.h" #include "genstamp.h" #include "internet_addressing.h" #include "serv_vcard.h" /* Needed for vcard_getuser and extract_inet_email_addrs */ diff --git a/citadel/messages.c b/citadel/messages.c index 37a2769a3..adc1d13b6 100644 --- a/citadel/messages.c +++ b/citadel/messages.c @@ -42,7 +42,6 @@ #include "messages.h" #include "commands.h" #include "rooms.h" -#include "html.h" #ifndef HAVE_SNPRINTF #include "snprintf.h" #endif diff --git a/citadel/msgbase.c b/citadel/msgbase.c index da1498f2b..10ec60887 100644 --- a/citadel/msgbase.c +++ b/citadel/msgbase.c @@ -45,7 +45,6 @@ #include "file_ops.h" #include "config.h" #include "control.h" -#include "html.h" #include "genstamp.h" #include "internet_addressing.h" #include "euidindex.h" diff --git a/libcitadel/Makefile.in b/libcitadel/Makefile.in index 4f9e76c45..037a56a64 100755 --- a/libcitadel/Makefile.in +++ b/libcitadel/Makefile.in @@ -113,7 +113,8 @@ LIB_OBJS = lib/libcitadel.lo \ lib/xdgmime/xdgmimemagic.lo \ lib/xdgmime/xdgmimealias.lo \ lib/xdgmime/xdgmimeparent.lo \ - lib/xdgmime/xdgmimecache.lo + lib/xdgmime/xdgmimecache.lo \ + lib/html_to_ascii.lo $(LIBRARY): $(LIB_OBJS) $(LINK_LIB) $(LIB_OBJS) @@ -131,6 +132,7 @@ lib/xdgmime/xdgmimemagic.lo: lib/xdgmime/xdgmimemagic.c lib/xdgmime/xdgmimealias.lo: lib/xdgmime/xdgmimealias.c lib/xdgmime/xdgmimeparent.lo: lib/xdgmime/xdgmimeparent.c lib/xdgmime/xdgmimecache.lo: lib/xdgmime/xdgmimecache.c +lib/html_to_ascii.lo: lib/html_to_ascii.c .SUFFIXES: .c .cpp .lo .o diff --git a/citadel/html.c b/libcitadel/lib/html_to_ascii.c similarity index 97% rename from citadel/html.c rename to libcitadel/lib/html_to_ascii.c index 9795a0c8b..e7d7fc5dd 100644 --- a/citadel/html.c +++ b/libcitadel/lib/html_to_ascii.c @@ -1,17 +1,21 @@ /* - * $Id$ + * $Id: html.c 6014 2008-02-04 18:38:35Z ajc $ * * Functions which handle translation between HTML and plain text * Copyright (c) 2000-2005 by Art Cancro and others. This program is * released under the terms of the GNU General Public License. */ -#include "sysdep.h" #include #include #include -#include #include +#include +#include +#include +#include +#include +#include #if TIME_WITH_SYS_TIME # include @@ -24,20 +28,7 @@ # endif #endif -#include -#include -#include -#include -#include -#include "citadel.h" -#include "server.h" -#include "control.h" -#include "sysdep_decls.h" -#include "support.h" -#include "config.h" -#include "msgbase.h" -#include "room_ops.h" -#include "html.h" +#include "libcitadel.h" /* diff --git a/libcitadel/lib/libcitadel.h b/libcitadel/lib/libcitadel.h index 5d8040950..83f69be4c 100644 --- a/libcitadel/lib/libcitadel.h +++ b/libcitadel/lib/libcitadel.h @@ -10,7 +10,7 @@ */ #include #include -#define LIBCITADEL_VERSION_NUMBER 106 +#define LIBCITADEL_VERSION_NUMBER 107 /* * Here's a bunch of stupid magic to make the MIME parser portable. @@ -221,6 +221,7 @@ char *rfc2047encode(char *line, long length); int is_msg_in_mset(char *mset, long msgnum); int pattern2(char *search, char *patn); void stripltlen(char *, int *); +char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat); diff --git a/webcit/Makefile.in b/webcit/Makefile.in index 59bf665c4..6b2a321b9 100644 --- a/webcit/Makefile.in +++ b/webcit/Makefile.in @@ -51,7 +51,7 @@ webserver: webserver.o context_loop.o ical_dezonify.o \ groupdav_main.o groupdav_get.o groupdav_propfind.o fmt_date.o \ groupdav_options.o autocompletion.o gettext.o tabs.o sieve.o \ groupdav_delete.o groupdav_put.o http_datestring.o setup_wizard.o \ - downloads.o addressbook_popup.o pushemail.o sysdep.o html.o \ + downloads.o addressbook_popup.o pushemail.o sysdep.o \ $(LIBOBJS) $(CC) webserver.o context_loop.o cookie_conversion.o \ webcit.o auth.o tcp_sockets.o mainmenu.o serv_func.o who.o listsub.o \ @@ -63,7 +63,7 @@ webserver: webserver.o context_loop.o ical_dezonify.o \ groupdav_main.o groupdav_get.o groupdav_propfind.o groupdav_delete.o \ groupdav_options.o autocompletion.o tabs.o smtpqueue.o sieve.o \ groupdav_put.o http_datestring.o setup_wizard.o fmt_date.o \ - gettext.o downloads.o addressbook_popup.o pushemail.o sysdep.o html.o \ + gettext.o downloads.o addressbook_popup.o pushemail.o sysdep.o \ $(LIBOBJS) $(LIBS) $(LDFLAGS) -o webserver .c.o: diff --git a/webcit/html.c b/webcit/html.c deleted file mode 100644 index 034e08017..000000000 --- a/webcit/html.c +++ /dev/null @@ -1,520 +0,0 @@ -/* - * $Id: html.c 6014 2008-02-04 18:38:35Z ajc $ - * - * Functions which handle translation between HTML and plain text - * Copyright (c) 2000-2005 by Art Cancro and others. This program is - * released under the terms of the GNU General Public License. - */ - -#include "webcit.h" - - -/* - * Convert HTML to plain text. - * - * inputmsg = pointer to raw HTML message - * screenwidth = desired output screenwidth - * do_citaformat = set to 1 to indent newlines with spaces - */ -char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat) { - char inbuf[SIZ]; - int inbuf_len = 0; - char outbuf[SIZ]; - char tag[1024]; - int done_reading = 0; - char *inptr; - char *outptr; - size_t outptr_buffer_size; - size_t output_len = 0; - int i, j, ch, did_out, rb, scanch; - int nest = 0; /* Bracket nesting level */ - int blockquote = 0; /* BLOCKQUOTE nesting level */ - int styletag = 0; /* STYLE tag nesting level */ - int styletag_start = 0; - int bytes_processed = 0; - char nl[128]; - - strcpy(nl, "\n"); - inptr = inputmsg; - strcpy(inbuf, ""); - strcpy(outbuf, ""); - if (msglen == 0) msglen = strlen(inputmsg); - - outptr_buffer_size = strlen(inptr) + SIZ; - outptr = malloc(outptr_buffer_size); - if (outptr == NULL) return NULL; - strcpy(outptr, ""); - output_len = 0; - - do { - /* Fill the input buffer */ - inbuf_len = strlen(inbuf); - if ( (done_reading == 0) && (inbuf_len < (SIZ-128)) ) { - - ch = *inptr++; - if (ch != 0) { - inbuf[inbuf_len++] = ch; - inbuf[inbuf_len] = 0; - } - else { - done_reading = 1; - } - - ++bytes_processed; - if (bytes_processed > msglen) { - done_reading = 1; - } - - } - - /* Do some parsing */ - if (!IsEmptyStr(inbuf)) { - - - /* Fold in all the spacing */ - for (i=0; !IsEmptyStr(&inbuf[i]); ++i) { - if (inbuf[i]==10) inbuf[i]=32; - if (inbuf[i]==13) inbuf[i]=32; - if (inbuf[i]==9) inbuf[i]=32; - /*** we like foreign characters now. - if ((inbuf[i]<32) || (inbuf[i]>126)) { - inbuf[i] = '?'; - } */ - } - for (i=0; !IsEmptyStr(&inbuf[i]); ++i) { - while ((inbuf[i]==32)&&(inbuf[i+1]==32)) - strcpy(&inbuf[i], &inbuf[i+1]); - } - - for (i=0; !IsEmptyStr(&inbuf[i]); ++i) { - - ch = inbuf[i]; - - if (ch == '<') { - ++nest; - strcpy(tag, ""); - } - - else if (ch == '>') { /* We have a tag. */ - if (nest > 0) --nest; - - /* Unqualify the tag (truncate at first space) */ - if (strchr(tag, ' ') != NULL) { - strcpy(strchr(tag, ' '), ""); - } - - if (!strcasecmp(tag, "P")) { - strcat(outbuf, nl); - strcat(outbuf, nl); - } - - if (!strcasecmp(tag, "/DIV")) { - strcat(outbuf, nl); - strcat(outbuf, nl); - } - - if (!strcasecmp(tag, "LI")) { - strcat(outbuf, nl); - strcat(outbuf, " * "); - } - - else if (!strcasecmp(tag, "/UL")) { - strcat(outbuf, nl); - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "H1")) { - strcat(outbuf, nl); - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "H2")) { - strcat(outbuf, nl); - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "H3")) { - strcat(outbuf, nl); - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "H4")) { - strcat(outbuf, nl); - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "/H1")) { - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "/H2")) { - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "/H3")) { - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "/H4")) { - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "HR")) { - strcat(outbuf, nl); - strcat(outbuf, " "); - for (j=0; j"); - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "/BLOCKQUOTE")) { - strcat(outbuf, "\n"); - --blockquote; - strcpy(nl, "\n"); - for (j=0; j"); - strcat(outbuf, nl); - } - - else if (!strcasecmp(tag, "STYLE")) { - ++styletag; - if (styletag == 1) { - styletag_start = strlen(outbuf); - } - } - - else if (!strcasecmp(tag, "/STYLE")) { - --styletag; - if (styletag == 0) { - outbuf[styletag_start] = 0; - } - } - - } - - else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) { - tag[strlen(tag)+1] = 0; - tag[strlen(tag)] = ch; - } - - else if (!nest) { - outbuf[strlen(outbuf)+1] = 0; - outbuf[strlen(outbuf)] = ch; - } - } - strcpy(inbuf, &inbuf[i]); - } - - /* Convert &; tags to the forbidden characters */ - if (!IsEmptyStr(outbuf)) for (i=0; !IsEmptyStr(&outbuf[i]); ++i) { - - /* Character entity references */ - if (!strncasecmp(&outbuf[i], " ", 6)) { - outbuf[i] = ' '; - strcpy(&outbuf[i+1], &outbuf[i+6]); - } - - if (!strncasecmp(&outbuf[i], " ", 6)) { - outbuf[i] = ' '; - strcpy(&outbuf[i+1], &outbuf[i+6]); - } - - if (!strncasecmp(&outbuf[i], " ", 6)) { - outbuf[i] = ' '; - strcpy(&outbuf[i+1], &outbuf[i+6]); - } - - if (!strncasecmp(&outbuf[i], " ", 8)) { - outbuf[i] = ' '; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "<", 4)) { - outbuf[i] = '<'; - strcpy(&outbuf[i+1], &outbuf[i+4]); - } - - else if (!strncasecmp(&outbuf[i], ">", 4)) { - outbuf[i] = '>'; - strcpy(&outbuf[i+1], &outbuf[i+4]); - } - - else if (!strncasecmp(&outbuf[i], "&", 5)) { - strcpy(&outbuf[i+1], &outbuf[i+5]); - } - - else if (!strncasecmp(&outbuf[i], """, 6)) { - outbuf[i] = '\"'; - strcpy(&outbuf[i+1], &outbuf[i+6]); - } - - else if (!strncasecmp(&outbuf[i], "‘", 7)) { - outbuf[i] = '`'; - strcpy(&outbuf[i+1], &outbuf[i+7]); - } - - else if (!strncasecmp(&outbuf[i], "’", 7)) { - outbuf[i] = '\''; - strcpy(&outbuf[i+1], &outbuf[i+7]); - } - - else if (!strncasecmp(&outbuf[i], "©", 6)) { - outbuf[i] = '('; - outbuf[i+1] = 'c'; - outbuf[i+2] = ')'; - strcpy(&outbuf[i+3], &outbuf[i+6]); - } - - else if (!strncasecmp(&outbuf[i], "•", 6)) { - outbuf[i] = ' '; - outbuf[i+1] = '*'; - outbuf[i+2] = ' '; - strcpy(&outbuf[i+3], &outbuf[i+6]); - } - - else if (!strncasecmp(&outbuf[i], "…", 8)) { - outbuf[i] = '.'; - outbuf[i+1] = '.'; - outbuf[i+2] = '.'; - strcpy(&outbuf[i+3], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "™", 7)) { - outbuf[i] = '('; - outbuf[i+1] = 't'; - outbuf[i+2] = 'm'; - outbuf[i+3] = ')'; - strcpy(&outbuf[i+4], &outbuf[i+7]); - } - - else if (!strncasecmp(&outbuf[i], "®", 5)) { - outbuf[i] = '('; - outbuf[i+1] = 'r'; - outbuf[i+2] = ')'; - strcpy(&outbuf[i+3], &outbuf[i+5]); - } - - else if (!strncasecmp(&outbuf[i], "¼", 8)) { - outbuf[i] = '1'; - outbuf[i+1] = '/'; - outbuf[i+2] = '4'; - strcpy(&outbuf[i+3], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "½", 8)) { - outbuf[i] = '1'; - outbuf[i+1] = '/'; - outbuf[i+2] = '2'; - strcpy(&outbuf[i+3], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "¾", 8)) { - outbuf[i] = '3'; - outbuf[i+1] = '/'; - outbuf[i+2] = '4'; - strcpy(&outbuf[i+3], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "–", 7)) { - outbuf[i] = '-'; - outbuf[i+1] = '-'; - strcpy(&outbuf[i+2], &outbuf[i+7]); - } - - else if (!strncasecmp(&outbuf[i], "—", 7)) { - outbuf[i] = '-'; - outbuf[i+1] = '-'; - outbuf[i+2] = '-'; - strcpy(&outbuf[i+3], &outbuf[i+7]); - } - - else if (!strncmp(&outbuf[i], "Ç", 8)) { - outbuf[i] = 'C'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "ç", 8)) { - outbuf[i] = 'c'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncmp(&outbuf[i], "È", 8)) { - outbuf[i] = 'E'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "è", 8)) { - outbuf[i] = 'e'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncmp(&outbuf[i], "Ê", 7)) { - outbuf[i] = 'E'; - strcpy(&outbuf[i+1], &outbuf[i+7]); - } - - else if (!strncasecmp(&outbuf[i], "ê", 7)) { - outbuf[i] = 'e'; - strcpy(&outbuf[i+1], &outbuf[i+7]); - } - - else if (!strncmp(&outbuf[i], "É", 8)) { - outbuf[i] = 'E'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "é", 8)) { - outbuf[i] = 'e'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncmp(&outbuf[i], "À", 8)) { - outbuf[i] = 'A'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "à", 8)) { - outbuf[i] = 'a'; - strcpy(&outbuf[i+1], &outbuf[i+8]); - } - - else if (!strncasecmp(&outbuf[i], "“", 7)) { - outbuf[i] = '\"'; - strcpy(&outbuf[i+1], &outbuf[i+7]); - } - - else if (!strncasecmp(&outbuf[i], "”", 7)) { - outbuf[i] = '\"'; - strcpy(&outbuf[i+1], &outbuf[i+7]); - } - - else if (!strncasecmp(&outbuf[i], "´", 7)) { - outbuf[i] = '\''; - strcpy(&outbuf[i+1], &outbuf[i+7]); - } - - /* two-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+4] == ';') ) { - scanch = 0; - sscanf(&outbuf[i+2], "%02d", &scanch); - outbuf[i] = scanch; - strcpy(&outbuf[i+1], &outbuf[i+5]); - } - - /* three-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+5] == ';') ) { - scanch = 0; - sscanf(&outbuf[i+2], "%03d", &scanch); - outbuf[i] = scanch; - strcpy(&outbuf[i+1], &outbuf[i+6]); - } - - } - - /* Make sure the output buffer is big enough */ - if ((output_len + strlen(outbuf) + SIZ) > outptr_buffer_size) { - outptr_buffer_size += SIZ; - outptr = realloc(outptr, outptr_buffer_size); - if (outptr == NULL) { - abort(); - } - } - - /* Output any lines terminated with hard line breaks */ - do { - did_out = 0; - if (strlen(outbuf) > 0) { - for (i = 0; i (screenwidth - 2 )) { - rb = (-1); - for (i=0; i<(screenwidth-2); ++i) { - if (outbuf[i]==32) rb = i; - } - if (rb>=0) { - strncpy(&outptr[output_len], outbuf, rb); - output_len += rb; - strcpy(&outptr[output_len], nl); - output_len += strlen(nl); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } - strcpy(outbuf, &outbuf[rb+1]); - } else { - strncpy(&outptr[output_len], outbuf, - screenwidth-2); - output_len += (screenwidth-2); - strcpy(&outptr[output_len], nl); - output_len += strlen(nl); - if (do_citaformat) { - strcpy(&outptr[output_len], " "); - ++output_len; - } - strcpy(outbuf, &outbuf[screenwidth-2]); - } - } - - } while (done_reading == 0); - - strcpy(&outptr[output_len], outbuf); - output_len += strlen(outbuf); - - /* Strip leading/trailing whitespace. We can't do this with - * striplt() because it uses too many strlen()'s - */ - while ((output_len > 0) && (isspace(outptr[0]))) { - strcpy(outptr, &outptr[1]); - --output_len; - } - while ((output_len > 0) && (isspace(outptr[output_len-1]))) { - outptr[output_len-1] = 0; - --output_len; - } - - if (outptr[output_len-1] != '\n') { - strcat(outptr, "\n"); - ++output_len; - } - - return outptr; - -} diff --git a/webcit/webcit.h b/webcit/webcit.h index d90c51817..5cd36956c 100644 --- a/webcit/webcit.h +++ b/webcit/webcit.h @@ -126,7 +126,7 @@ extern locale_t wc_locales[]; #define CLIENT_ID 4 #define CLIENT_VERSION 730 /* This version of WebCit */ #define MINIMUM_CIT_VERSION 730 /* min required Citadel ver */ -#define LIBCITADEL_MIN 106 /* min required libcitadel ver */ +#define LIBCITADEL_MIN 107 /* min required libcitadel ver */ #define DEFAULT_HOST "localhost" /* Default Citadel server */ #define DEFAULT_PORT "504" #define LB (1) /* Internal escape chars */ @@ -759,7 +759,6 @@ void display_wiki_page(void); int get_time_format_cached (void); int xtoi(char *in, size_t len); void webcit_fmt_date(char *buf, time_t thetime, int brief); -char *html_to_ascii(char *inputmsg, int msglen, int screenwidth, int do_citaformat); #ifdef HAVE_ICONV -- 2.30.2