fix all the <time.h> vs. <sys/time.h> issues, hopefully
[citadel.git] / citadel / html.c
1 /*
2  * $Id$
3  *
4  * Functions which handle translation between HTML and plain text
5  */
6
7 #include "sysdep.h"
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <fcntl.h>
12 #include <signal.h>
13
14 #if TIME_WITH_SYS_TIME
15 # include <sys/time.h>
16 # include <time.h>
17 #else
18 # if HAVE_SYS_TIME_H
19 #  include <sys/time.h>
20 # else
21 #  include <time.h>
22 # endif
23 #endif
24
25 #include <ctype.h>
26 #include <string.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <syslog.h>
30 #include "citadel.h"
31 #include "server.h"
32 #include "control.h"
33 #include "sysdep_decls.h"
34 #include "support.h"
35 #include "config.h"
36 #include "msgbase.h"
37 #include "tools.h"
38 #include "room_ops.h"
39 #include "html.h"
40  
41
42 /*
43  * Convert HTML to plain text.
44  *
45  * inputmsg      = pointer to raw HTML message
46  * screenwidth   = desired output screenwidth
47  * do_citaformat = set to 1 to indent newlines with spaces
48  */
49 char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
50         char inbuf[SIZ];
51         char outbuf[SIZ];
52         char tag[1024];
53         int done_reading = 0;
54         char *inptr;
55         char *outptr;
56         size_t outlen;
57         int i, j, ch, did_out, rb;
58         int nest = 0;           /* Bracket nesting level */
59
60         inptr = inputmsg;
61         strcpy(inbuf, "");
62         strcpy(outbuf, "");
63
64         outptr = mallok(strlen(inptr) + SIZ);
65         if (outptr == NULL) return NULL;
66         strcpy(outptr, "");
67         outlen = 0;
68
69         do {
70                 /* Fill the input buffer */
71                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
72
73                         ch = *inputmsg++;
74                         if (ch > 0) {
75                                 inbuf[strlen(inbuf)+1] = 0;
76                                 inbuf[strlen(inbuf)] = ch;
77                         } 
78                         else {
79                                 done_reading = 1;
80                         }
81
82                 }
83
84                 /* Do some parsing */
85                 if (strlen(inbuf)>0) {
86
87                     /* Fold in all the spacing */
88                     for (i=0; i<strlen(inbuf); ++i) {
89                         if (inbuf[i]==10) inbuf[i]=32;
90                         if (inbuf[i]==13) inbuf[i]=32;
91                         if (inbuf[i]==9) inbuf[i]=32;
92                         if ((inbuf[i]<32) || (inbuf[i]>126))
93                                 strcpy(&inbuf[i], &inbuf[i+1]);
94                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
95                                 strcpy(&inbuf[i], &inbuf[i+1]);
96                     }
97
98                     for (i=0; i<strlen(inbuf); ++i) {
99
100                         ch = inbuf[i];
101
102                         if (ch == '<') {
103                                 ++nest;
104                                 strcpy(tag, "");
105                         }
106
107                         else if (ch == '>') {
108                                 if (nest > 0) --nest;
109                                 
110                                 if (!strcasecmp(tag, "P")) {
111                                         strcat(outbuf, "\n\n");
112                                 }
113
114                                 if (!strcasecmp(tag, "/DIV")) {
115                                         strcat(outbuf, "\n\n");
116                                 }
117
118                                 else if (!strcasecmp(tag, "H1")) {
119                                         strcat(outbuf, "\n\n");
120                                 }
121
122                                 else if (!strcasecmp(tag, "H2")) {
123                                         strcat(outbuf, "\n\n");
124                                 }
125
126                                 else if (!strcasecmp(tag, "H3")) {
127                                         strcat(outbuf, "\n\n");
128                                 }
129
130                                 else if (!strcasecmp(tag, "H4")) {
131                                         strcat(outbuf, "\n\n");
132                                 }
133
134                                 else if (!strcasecmp(tag, "/H1")) {
135                                         strcat(outbuf, "\n");
136                                 }
137
138                                 else if (!strcasecmp(tag, "/H2")) {
139                                         strcat(outbuf, "\n");
140                                 }
141
142                                 else if (!strcasecmp(tag, "/H3")) {
143                                         strcat(outbuf, "\n");
144                                 }
145
146                                 else if (!strcasecmp(tag, "/H4")) {
147                                         strcat(outbuf, "\n");
148                                 }
149
150                                 else if (!strcasecmp(tag, "HR")) {
151                                         strcat(outbuf, "\n ");
152                                         for (j=0; j<screenwidth-2; ++j)
153                                                 strcat(outbuf, "-");
154                                         strcat(outbuf, "\n");
155                                 }
156
157                                 else if (!strcasecmp(tag, "BR")) {
158                                         strcat(outbuf, "\n");
159                                 }
160
161                                 else if (!strcasecmp(tag, "TR")) {
162                                         strcat(outbuf, "\n");
163                                 }
164
165                                 else if (!strcasecmp(tag, "/TABLE")) {
166                                         strcat(outbuf, "\n");
167                                 }
168
169                         }
170
171                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
172                                 tag[strlen(tag)+1] = 0;
173                                 tag[strlen(tag)] = ch;
174                         }
175                                 
176                         else if (!nest) {
177                                 outbuf[strlen(outbuf)+1] = 0;
178                                 outbuf[strlen(outbuf)] = ch;
179                         }
180                     }
181                     strcpy(inbuf, &inbuf[i]);
182                 }
183
184                 /* Convert &; tags to the forbidden characters */
185                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
186
187                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
188                                 outbuf[i] = ' ';
189                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
190                         }
191
192                         else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
193                                 outbuf[i] = '<';
194                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
195                         }
196
197                         else if (!strncasecmp(&outbuf[i], "&gt;", 4)) {
198                                 outbuf[i] = '>';
199                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
200                         }
201
202                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
203                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
204                         }
205
206                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
207                                 outbuf[i] = '\"';
208                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
209                         }
210
211                         else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
212                                 outbuf[i] = '(';
213                                 outbuf[i+1] = 'c';
214                                 outbuf[i+2] = ')';
215                                 strcpy(&outbuf[i+3], &outbuf[i+6]);
216                         }
217
218                         else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
219                                 outbuf[i] = '(';
220                                 outbuf[i+1] = 'r';
221                                 outbuf[i+2] = ')';
222                                 strcpy(&outbuf[i+3], &outbuf[i+5]);
223                         }
224
225                 }
226
227                 /* Make sure the output buffer is big enough */
228                 if ((strlen(outptr) + strlen(outbuf) + 128) > outlen) {
229                         outlen += 128;
230                         outptr = realloc(outptr, outlen);
231                 }
232
233                 /* Output any lines terminated with hard line breaks */
234                 do {
235                         did_out = 0;
236                         if (strlen(outbuf)>0)
237                             for (i = 0; i<strlen(outbuf); ++i) {
238                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
239                                         strncat(outptr, outbuf, i+1);
240                                         strcat(outptr, "\n");
241                                         if (do_citaformat)
242                                                 strcat(outptr, " ");
243                                         strcpy(outbuf, &outbuf[i+1]);
244                                         i = 0;
245                                         did_out = 1;
246                                 }
247                         }
248                 } while (did_out);
249
250                 /* Add soft line breaks */
251                 if (strlen(outbuf) > (screenwidth - 2)) {
252                         rb = (-1);
253                         for (i=0; i<(screenwidth-2); ++i) {
254                                 if (outbuf[i]==32) rb = i;
255                         }
256                         if (rb>=0) {
257                                 strncat(outptr, outbuf, rb);
258                                 strcat(outptr, "\n");
259                                 if (do_citaformat)
260                                         strcat(outptr, " ");
261                                 strcpy(outbuf, &outbuf[rb+1]);
262                         } else {
263
264                                 strncat(outptr, outbuf, screenwidth-2);
265                                 strcat(outptr, "\n");
266                                 if (do_citaformat)
267                                         strcat(outptr, " ");
268                                 strcpy(outbuf, &outbuf[screenwidth-2]);
269                         }
270                 }
271
272         } while (done_reading == 0);
273
274         strcat(outptr, outbuf);
275         strcat(outptr, "\n");
276
277         return outptr;
278
279 }
280