- port to Cygwin (DLL support, etc.)
[citadel.git] / citadel / html.c
1 /*
2  * $Id$
3  *
4  * Functions which handle translation between HTML and plain text
5  */
6
7 #ifdef DLL_EXPORT
8 #define IN_LIBCIT
9 #endif
10
11 #include "sysdep.h"
12 #include <stdlib.h>
13 #include <unistd.h>
14 #include <stdio.h>
15 #include <fcntl.h>
16 #include <signal.h>
17
18 #if TIME_WITH_SYS_TIME
19 # include <sys/time.h>
20 # include <time.h>
21 #else
22 # if HAVE_SYS_TIME_H
23 #  include <sys/time.h>
24 # else
25 #  include <time.h>
26 # endif
27 #endif
28
29 #include <ctype.h>
30 #include <string.h>
31 #include <errno.h>
32 #include <limits.h>
33 #include <syslog.h>
34 #include "citadel.h"
35 #include "server.h"
36 #include "dynloader.h"
37 #include "control.h"
38 #include "sysdep_decls.h"
39 #include "support.h"
40 #include "config.h"
41 #include "msgbase.h"
42 #include "tools.h"
43 #include "room_ops.h"
44 #include "html.h"
45  
46
47 /*
48  * Convert HTML to plain text.
49  *
50  * inputmsg      = pointer to raw HTML message
51  * screenwidth   = desired output screenwidth
52  * do_citaformat = set to 1 to indent newlines with spaces
53  */
54 char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
55         char inbuf[SIZ];
56         char outbuf[SIZ];
57         char tag[1024];
58         int done_reading = 0;
59         char *inptr;
60         char *outptr;
61         size_t outlen;
62         int i, j, ch, did_out, rb;
63         int nest = 0;           /* Bracket nesting level */
64
65         inptr = inputmsg;
66         strcpy(inbuf, "");
67         strcpy(outbuf, "");
68
69         outptr = mallok(strlen(inptr) + SIZ);
70         if (outptr == NULL) return NULL;
71         strcpy(outptr, "");
72         outlen = 0;
73
74         do {
75                 /* Fill the input buffer */
76                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
77
78                         ch = *inputmsg++;
79                         if (ch > 0) {
80                                 inbuf[strlen(inbuf)+1] = 0;
81                                 inbuf[strlen(inbuf)] = ch;
82                         } 
83                         else {
84                                 done_reading = 1;
85                         }
86
87                 }
88
89                 /* Do some parsing */
90                 if (strlen(inbuf)>0) {
91
92                     /* Fold in all the spacing */
93                     for (i=0; i<strlen(inbuf); ++i) {
94                         if (inbuf[i]==10) inbuf[i]=32;
95                         if (inbuf[i]==13) inbuf[i]=32;
96                         if (inbuf[i]==9) inbuf[i]=32;
97                         if ((inbuf[i]<32) || (inbuf[i]>126))
98                                 strcpy(&inbuf[i], &inbuf[i+1]);
99                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
100                                 strcpy(&inbuf[i], &inbuf[i+1]);
101                     }
102
103                     for (i=0; i<strlen(inbuf); ++i) {
104
105                         ch = inbuf[i];
106
107                         if (ch == '<') {
108                                 ++nest;
109                                 strcpy(tag, "");
110                         }
111
112                         else if (ch == '>') {
113                                 if (nest > 0) --nest;
114                                 
115                                 if (!strcasecmp(tag, "P")) {
116                                         strcat(outbuf, "\n\n");
117                                 }
118
119                                 if (!strcasecmp(tag, "/DIV")) {
120                                         strcat(outbuf, "\n\n");
121                                 }
122
123                                 else if (!strcasecmp(tag, "H1")) {
124                                         strcat(outbuf, "\n\n");
125                                 }
126
127                                 else if (!strcasecmp(tag, "H2")) {
128                                         strcat(outbuf, "\n\n");
129                                 }
130
131                                 else if (!strcasecmp(tag, "H3")) {
132                                         strcat(outbuf, "\n\n");
133                                 }
134
135                                 else if (!strcasecmp(tag, "H4")) {
136                                         strcat(outbuf, "\n\n");
137                                 }
138
139                                 else if (!strcasecmp(tag, "/H1")) {
140                                         strcat(outbuf, "\n");
141                                 }
142
143                                 else if (!strcasecmp(tag, "/H2")) {
144                                         strcat(outbuf, "\n");
145                                 }
146
147                                 else if (!strcasecmp(tag, "/H3")) {
148                                         strcat(outbuf, "\n");
149                                 }
150
151                                 else if (!strcasecmp(tag, "/H4")) {
152                                         strcat(outbuf, "\n");
153                                 }
154
155                                 else if (!strcasecmp(tag, "HR")) {
156                                         strcat(outbuf, "\n ");
157                                         for (j=0; j<screenwidth-2; ++j)
158                                                 strcat(outbuf, "-");
159                                         strcat(outbuf, "\n");
160                                 }
161
162                                 else if (!strcasecmp(tag, "BR")) {
163                                         strcat(outbuf, "\n");
164                                 }
165
166                                 else if (!strcasecmp(tag, "TR")) {
167                                         strcat(outbuf, "\n");
168                                 }
169
170                                 else if (!strcasecmp(tag, "/TABLE")) {
171                                         strcat(outbuf, "\n");
172                                 }
173
174                         }
175
176                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
177                                 tag[strlen(tag)+1] = 0;
178                                 tag[strlen(tag)] = ch;
179                         }
180                                 
181                         else if (!nest) {
182                                 outbuf[strlen(outbuf)+1] = 0;
183                                 outbuf[strlen(outbuf)] = ch;
184                         }
185                     }
186                     strcpy(inbuf, &inbuf[i]);
187                 }
188
189                 /* Convert &; tags to the forbidden characters */
190                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
191
192                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
193                                 outbuf[i] = ' ';
194                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
195                         }
196
197                         else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
198                                 outbuf[i] = '<';
199                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
200                         }
201
202                         else if (!strncasecmp(&outbuf[i], "&gt;", 4)) {
203                                 outbuf[i] = '>';
204                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
205                         }
206
207                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
208                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
209                         }
210
211                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
212                                 outbuf[i] = '\"';
213                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
214                         }
215
216                         else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
217                                 outbuf[i] = '(';
218                                 outbuf[i+1] = 'c';
219                                 outbuf[i+2] = ')';
220                                 strcpy(&outbuf[i+3], &outbuf[i+6]);
221                         }
222
223                         else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
224                                 outbuf[i] = '(';
225                                 outbuf[i+1] = 'r';
226                                 outbuf[i+2] = ')';
227                                 strcpy(&outbuf[i+3], &outbuf[i+5]);
228                         }
229
230                 }
231
232                 /* Make sure the output buffer is big enough */
233                 if ((strlen(outptr) + strlen(outbuf) + 128) > outlen) {
234                         outlen += 128;
235                         outptr = realloc(outptr, outlen);
236                 }
237
238                 /* Output any lines terminated with hard line breaks */
239                 do {
240                         did_out = 0;
241                         if (strlen(outbuf)>0)
242                             for (i = 0; i<strlen(outbuf); ++i) {
243                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
244                                         strncat(outptr, outbuf, i+1);
245                                         strcat(outptr, "\n");
246                                         if (do_citaformat)
247                                                 strcat(outptr, " ");
248                                         strcpy(outbuf, &outbuf[i+1]);
249                                         i = 0;
250                                         did_out = 1;
251                                 }
252                         }
253                 } while (did_out);
254
255                 /* Add soft line breaks */
256                 if (strlen(outbuf) > (screenwidth - 2)) {
257                         rb = (-1);
258                         for (i=0; i<(screenwidth-2); ++i) {
259                                 if (outbuf[i]==32) rb = i;
260                         }
261                         if (rb>=0) {
262                                 strncat(outptr, outbuf, rb);
263                                 strcat(outptr, "\n");
264                                 if (do_citaformat)
265                                         strcat(outptr, " ");
266                                 strcpy(outbuf, &outbuf[rb+1]);
267                         } else {
268
269                                 strncat(outptr, outbuf, screenwidth-2);
270                                 strcat(outptr, "\n");
271                                 if (do_citaformat)
272                                         strcat(outptr, " ");
273                                 strcpy(outbuf, &outbuf[screenwidth-2]);
274                         }
275                 }
276
277         } while (done_reading == 0);
278
279         strcat(outptr, outbuf);
280         strcat(outptr, "\n");
281
282         return outptr;
283
284 }
285