]> code.citadel.org Git - citadel.git/blob - citadel/html.c
* Added MSG4 support to client-side IPC
[citadel.git] / citadel / html.c
1 /*
2  * $Id$
3  *
4  * Functions which handle translation between HTML and plain text
5  * Copyright (c) 2000-2001 by Art Cancro and others.   This program is
6  * released under the terms of the GNU General Public License.
7  */
8
9 #ifdef DLL_EXPORT
10 #define IN_LIBCIT
11 #endif
12
13 #include "sysdep.h"
14 #include <stdlib.h>
15 #include <unistd.h>
16 #include <stdio.h>
17 #include <fcntl.h>
18 #include <signal.h>
19
20 #if TIME_WITH_SYS_TIME
21 # include <sys/time.h>
22 # include <time.h>
23 #else
24 # if HAVE_SYS_TIME_H
25 #  include <sys/time.h>
26 # else
27 #  include <time.h>
28 # endif
29 #endif
30
31 #include <ctype.h>
32 #include <string.h>
33 #include <errno.h>
34 #include <limits.h>
35 #include <syslog.h>
36 #include "citadel.h"
37 #include "server.h"
38 #include "dynloader.h"
39 #include "control.h"
40 #include "sysdep_decls.h"
41 #include "support.h"
42 #include "config.h"
43 #include "msgbase.h"
44 #include "tools.h"
45 #include "room_ops.h"
46 #include "html.h"
47  
48
49 /*
50  * Convert HTML to plain text.
51  *
52  * inputmsg      = pointer to raw HTML message
53  * screenwidth   = desired output screenwidth
54  * do_citaformat = set to 1 to indent newlines with spaces
55  */
56 char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
57         char inbuf[SIZ];
58         char outbuf[SIZ];
59         char tag[1024];
60         int done_reading = 0;
61         char *inptr;
62         char *outptr;
63         size_t outlen;
64         int i, j, ch, did_out, rb;
65         int nest = 0;           /* Bracket nesting level */
66
67         inptr = inputmsg;
68         strcpy(inbuf, "");
69         strcpy(outbuf, "");
70
71         outptr = mallok(strlen(inptr) + SIZ);
72         if (outptr == NULL) return NULL;
73         strcpy(outptr, "");
74         outlen = 0;
75
76         do {
77                 /* Fill the input buffer */
78                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
79
80                         ch = *inputmsg++;
81                         if (ch > 0) {
82                                 inbuf[strlen(inbuf)+1] = 0;
83                                 inbuf[strlen(inbuf)] = ch;
84                         } 
85                         else {
86                                 done_reading = 1;
87                         }
88
89                 }
90
91                 /* Do some parsing */
92                 if (strlen(inbuf)>0) {
93
94                     /* Fold in all the spacing */
95                     for (i=0; i<strlen(inbuf); ++i) {
96                         if (inbuf[i]==10) inbuf[i]=32;
97                         if (inbuf[i]==13) inbuf[i]=32;
98                         if (inbuf[i]==9) inbuf[i]=32;
99                         if ((inbuf[i]<32) || (inbuf[i]>126))
100                                 strcpy(&inbuf[i], &inbuf[i+1]);
101                     }
102                     for (i=0; i<strlen(inbuf); ++i) {
103                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
104                                 strcpy(&inbuf[i], &inbuf[i+1]);
105                     }
106
107                     for (i=0; i<strlen(inbuf); ++i) {
108
109                         ch = inbuf[i];
110
111                         if (ch == '<') {
112                                 ++nest;
113                                 strcpy(tag, "");
114                         }
115
116                         else if (ch == '>') {
117                                 if (nest > 0) --nest;
118                                 
119                                 if (!strcasecmp(tag, "P")) {
120                                         strcat(outbuf, "\n\n");
121                                 }
122
123                                 if (!strcasecmp(tag, "/DIV")) {
124                                         strcat(outbuf, "\n\n");
125                                 }
126
127                                 else if (!strcasecmp(tag, "H1")) {
128                                         strcat(outbuf, "\n\n");
129                                 }
130
131                                 else if (!strcasecmp(tag, "H2")) {
132                                         strcat(outbuf, "\n\n");
133                                 }
134
135                                 else if (!strcasecmp(tag, "H3")) {
136                                         strcat(outbuf, "\n\n");
137                                 }
138
139                                 else if (!strcasecmp(tag, "H4")) {
140                                         strcat(outbuf, "\n\n");
141                                 }
142
143                                 else if (!strcasecmp(tag, "/H1")) {
144                                         strcat(outbuf, "\n");
145                                 }
146
147                                 else if (!strcasecmp(tag, "/H2")) {
148                                         strcat(outbuf, "\n");
149                                 }
150
151                                 else if (!strcasecmp(tag, "/H3")) {
152                                         strcat(outbuf, "\n");
153                                 }
154
155                                 else if (!strcasecmp(tag, "/H4")) {
156                                         strcat(outbuf, "\n");
157                                 }
158
159                                 else if (!strcasecmp(tag, "HR")) {
160                                         strcat(outbuf, "\n ");
161                                         for (j=0; j<screenwidth-2; ++j)
162                                                 strcat(outbuf, "-");
163                                         strcat(outbuf, "\n");
164                                 }
165
166                                 else if (!strcasecmp(tag, "BR")) {
167                                         strcat(outbuf, "\n");
168                                 }
169
170                                 else if (!strcasecmp(tag, "TR")) {
171                                         strcat(outbuf, "\n");
172                                 }
173
174                                 else if (!strcasecmp(tag, "/TABLE")) {
175                                         strcat(outbuf, "\n");
176                                 }
177
178                         }
179
180                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
181                                 tag[strlen(tag)+1] = 0;
182                                 tag[strlen(tag)] = ch;
183                         }
184                                 
185                         else if (!nest) {
186                                 outbuf[strlen(outbuf)+1] = 0;
187                                 outbuf[strlen(outbuf)] = ch;
188                         }
189                     }
190                     strcpy(inbuf, &inbuf[i]);
191                 }
192
193                 /* Convert &; tags to the forbidden characters */
194                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
195
196                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
197                                 outbuf[i] = ' ';
198                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
199                         }
200
201                         else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
202                                 outbuf[i] = '<';
203                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
204                         }
205
206                         else if (!strncasecmp(&outbuf[i], "&gt;", 4)) {
207                                 outbuf[i] = '>';
208                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
209                         }
210
211                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
212                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
213                         }
214
215                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
216                                 outbuf[i] = '\"';
217                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
218                         }
219
220                         else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
221                                 outbuf[i] = '(';
222                                 outbuf[i+1] = 'c';
223                                 outbuf[i+2] = ')';
224                                 strcpy(&outbuf[i+3], &outbuf[i+6]);
225                         }
226
227                         else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
228                                 outbuf[i] = '(';
229                                 outbuf[i+1] = 'r';
230                                 outbuf[i+2] = ')';
231                                 strcpy(&outbuf[i+3], &outbuf[i+5]);
232                         }
233
234                 }
235
236                 /* Make sure the output buffer is big enough */
237                 if ((strlen(outptr) + strlen(outbuf) + 128) > outlen) {
238                         outlen += 128;
239                         outptr = realloc(outptr, outlen);
240                 }
241
242                 /* Output any lines terminated with hard line breaks */
243                 do {
244                         did_out = 0;
245                         if (strlen(outbuf)>0)
246                             for (i = 0; i<strlen(outbuf); ++i) {
247                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
248                                         strncat(outptr, outbuf, i+1);
249                                         /* strcat(outptr, "\n"); */
250                                         if (do_citaformat)
251                                                 strcat(outptr, " ");
252                                         strcpy(outbuf, &outbuf[i+1]);
253                                         i = 0;
254                                         did_out = 1;
255                                 }
256                         }
257                 } while (did_out);
258
259                 /* Add soft line breaks */
260                 if (strlen(outbuf) > (screenwidth - 2)) {
261                         rb = (-1);
262                         for (i=0; i<(screenwidth-2); ++i) {
263                                 if (outbuf[i]==32) rb = i;
264                         }
265                         if (rb>=0) {
266                                 strncat(outptr, outbuf, rb);
267                                 strcat(outptr, "\n");
268                                 if (do_citaformat)
269                                         strcat(outptr, " ");
270                                 strcpy(outbuf, &outbuf[rb+1]);
271                         } else {
272
273                                 strncat(outptr, outbuf, screenwidth-2);
274                                 strcat(outptr, "\n");
275                                 if (do_citaformat)
276                                         strcat(outptr, " ");
277                                 strcpy(outbuf, &outbuf[screenwidth-2]);
278                         }
279                 }
280
281         } while (done_reading == 0);
282
283         strcat(outptr, outbuf);
284         striplt(outptr);
285         if (outptr[strlen(outptr)-1] != '\n') strcat(outptr, "\n");
286
287         return outptr;
288
289 }