]> code.citadel.org Git - citadel.git/blob - citadel/html.c
* Made some changes to the output of MIME (especially multipart) messages.
[citadel.git] / citadel / html.c
1 /*
2  * html.c -- Functions which handle translation between HTML and plain text
3  * $Id$
4  */
5
6 #include "sysdep.h"
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <time.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <limits.h>
17 #ifdef HAVE_PTHREAD_H
18 #include <pthread.h>
19 #endif
20 #include <syslog.h>
21 #include "citadel.h"
22 #include "server.h"
23 #include "control.h"
24 #include "sysdep_decls.h"
25 #include "support.h"
26 #include "config.h"
27 #include "msgbase.h"
28 #include "tools.h"
29 #include "room_ops.h"
30 #include "html.h"
31  
32
33 /*
34  * Convert HTML to plain text.
35  *
36  * inputmsg      = pointer to raw HTML message
37  * screenwidth   = desired output screenwidth
38  * do_citaformat = set to 1 to indent newlines with spaces
39  */
40 char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
41         char inbuf[256];
42         char outbuf[256];
43         char tag[1024];
44         int done_reading = 0;
45         char *inptr;
46         char *outptr;
47         size_t outlen;
48         int i, j, ch, did_out, rb;
49         int nest = 0;           /* Bracket nesting level */
50
51         inptr = inputmsg;
52         strcpy(inbuf, "");
53         strcpy(outbuf, "");
54
55         outptr = mallok(strlen(inptr) + 256);
56         if (outptr == NULL) return NULL;
57         strcpy(outptr, "");
58         outlen = 0;
59
60         do {
61                 /* Fill the input buffer */
62                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
63
64                         /* FIX ... genericize this */
65                         ch = *inputmsg++;
66                         if (ch > 0) {
67                                 inbuf[strlen(inbuf)+1] = 0;
68                                 inbuf[strlen(inbuf)] = ch;
69                         } 
70                         else {
71                                 done_reading = 1;
72                         }
73
74                 }
75
76                 /* Do some parsing */
77                 if (strlen(inbuf)>0) {
78
79                     /* Fold in all the spacing */
80                     for (i=0; i<strlen(inbuf); ++i) {
81                         if (inbuf[i]==10) inbuf[i]=32;
82                         if (inbuf[i]==13) inbuf[i]=32;
83                         if (inbuf[i]==9) inbuf[i]=32;
84                         if ((inbuf[i]<32) || (inbuf[i]>126))
85                                 strcpy(&inbuf[i], &inbuf[i+1]);
86                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
87                                 strcpy(&inbuf[i], &inbuf[i+1]);
88                     }
89
90                     for (i=0; i<strlen(inbuf); ++i) {
91
92                         ch = inbuf[i];
93
94                         if (ch == '<') {
95                                 ++nest;
96                                 strcpy(tag, "");
97                         }
98
99                         else if (ch == '>') {
100                                 if (nest > 0) --nest;
101                                 
102                                 if (!strcasecmp(tag, "P")) {
103                                         strcat(outbuf, "\n\n");
104                                 }
105
106                                 if (!strcasecmp(tag, "/DIV")) {
107                                         strcat(outbuf, "\n\n");
108                                 }
109
110                                 else if (!strcasecmp(tag, "H1")) {
111                                         strcat(outbuf, "\n\n");
112                                 }
113
114                                 else if (!strcasecmp(tag, "H2")) {
115                                         strcat(outbuf, "\n\n");
116                                 }
117
118                                 else if (!strcasecmp(tag, "H3")) {
119                                         strcat(outbuf, "\n\n");
120                                 }
121
122                                 else if (!strcasecmp(tag, "H4")) {
123                                         strcat(outbuf, "\n\n");
124                                 }
125
126                                 else if (!strcasecmp(tag, "/H1")) {
127                                         strcat(outbuf, "\n");
128                                 }
129
130                                 else if (!strcasecmp(tag, "/H2")) {
131                                         strcat(outbuf, "\n");
132                                 }
133
134                                 else if (!strcasecmp(tag, "/H3")) {
135                                         strcat(outbuf, "\n");
136                                 }
137
138                                 else if (!strcasecmp(tag, "/H4")) {
139                                         strcat(outbuf, "\n");
140                                 }
141
142                                 else if (!strcasecmp(tag, "HR")) {
143                                         strcat(outbuf, "\n ");
144                                         for (j=0; j<screenwidth-2; ++j)
145                                                 strcat(outbuf, "-");
146                                         strcat(outbuf, "\n");
147                                 }
148
149                                 else if (!strcasecmp(tag, "BR")) {
150                                         strcat(outbuf, "\n");
151                                 }
152
153                                 else if (!strcasecmp(tag, "TR")) {
154                                         strcat(outbuf, "\n");
155                                 }
156
157                                 else if (!strcasecmp(tag, "/TABLE")) {
158                                         strcat(outbuf, "\n");
159                                 }
160
161                         }
162
163                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
164                                 tag[strlen(tag)+1] = 0;
165                                 tag[strlen(tag)] = ch;
166                         }
167                                 
168                         else if (!nest) {
169                                 outbuf[strlen(outbuf)+1] = 0;
170                                 outbuf[strlen(outbuf)] = ch;
171                         }
172                     }
173                     strcpy(inbuf, &inbuf[i]);
174                 }
175
176                 /* Convert &; tags to the forbidden characters */
177                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
178
179                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
180                                 outbuf[i] = ' ';
181                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
182                         }
183
184                         else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
185                                 outbuf[i] = '<';
186                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
187                         }
188
189                         else if (!strncasecmp(&outbuf[i], "&gt;", 4)) {
190                                 outbuf[i] = '>';
191                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
192                         }
193
194                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
195                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
196                         }
197
198                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
199                                 outbuf[i] = '\"';
200                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
201                         }
202
203                         else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
204                                 outbuf[i] = '(';
205                                 outbuf[i+1] = 'c';
206                                 outbuf[i+2] = ')';
207                                 strcpy(&outbuf[i+3], &outbuf[i+6]);
208                         }
209
210                         else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
211                                 outbuf[i] = '(';
212                                 outbuf[i+1] = 'r';
213                                 outbuf[i+2] = ')';
214                                 strcpy(&outbuf[i+3], &outbuf[i+5]);
215                         }
216
217                 }
218
219                 /* Make sure the output buffer is big enough */
220                 if ((strlen(outptr) + strlen(outbuf) + 128) > outlen) {
221                         outlen += 128;
222                         outptr = realloc(outptr, outlen);
223                 }
224
225                 /* Output any lines terminated with hard line breaks */
226                 do {
227                         did_out = 0;
228                         if (strlen(outbuf)>0)
229                             for (i = 0; i<strlen(outbuf); ++i) {
230                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
231                                         strncat(outptr, outbuf, i+1);
232                                         strcat(outptr, "\n");
233                                         if (do_citaformat)
234                                                 strcat(outptr, " ");
235                                         strcpy(outbuf, &outbuf[i+1]);
236                                         i = 0;
237                                         did_out = 1;
238                                 }
239                         }
240                 } while (did_out);
241
242                 /* Add soft line breaks */
243                 if (strlen(outbuf) > (screenwidth - 2)) {
244                         rb = (-1);
245                         for (i=0; i<(screenwidth-2); ++i) {
246                                 if (outbuf[i]==32) rb = i;
247                         }
248                         if (rb>=0) {
249                                 strncat(outptr, outbuf, rb);
250                                 strcat(outptr, "\n");
251                                 if (do_citaformat)
252                                         strcat(outptr, " ");
253                                 strcpy(outbuf, &outbuf[rb+1]);
254                         } else {
255
256                                 strncat(outptr, outbuf, screenwidth-2);
257                                 strcat(outptr, "\n");
258                                 if (do_citaformat)
259                                         strcat(outptr, " ");
260                                 strcpy(outbuf, &outbuf[screenwidth-2]);
261                         }
262                 }
263
264         } while (done_reading == 0);
265
266         strcat(outptr, outbuf);
267         strcat(outptr, "\n");
268
269         return outptr;
270
271 }
272