* Removed all of the thread cancellation cruft that is no longer necessary
[citadel.git] / citadel / html.c
1 /*
2  * html.c -- Functions which handle translation between HTML and plain text
3  * $Id$
4  */
5
6 #include "sysdep.h"
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <time.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <limits.h>
17 #include <syslog.h>
18 #include "citadel.h"
19 #include "server.h"
20 #include "control.h"
21 #include "sysdep_decls.h"
22 #include "support.h"
23 #include "config.h"
24 #include "msgbase.h"
25 #include "tools.h"
26 #include "room_ops.h"
27 #include "html.h"
28  
29
30 /*
31  * Convert HTML to plain text.
32  *
33  * inputmsg      = pointer to raw HTML message
34  * screenwidth   = desired output screenwidth
35  * do_citaformat = set to 1 to indent newlines with spaces
36  */
37 char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
38         char inbuf[256];
39         char outbuf[256];
40         char tag[1024];
41         int done_reading = 0;
42         char *inptr;
43         char *outptr;
44         size_t outlen;
45         int i, j, ch, did_out, rb;
46         int nest = 0;           /* Bracket nesting level */
47
48         inptr = inputmsg;
49         strcpy(inbuf, "");
50         strcpy(outbuf, "");
51
52         outptr = mallok(strlen(inptr) + 256);
53         if (outptr == NULL) return NULL;
54         strcpy(outptr, "");
55         outlen = 0;
56
57         do {
58                 /* Fill the input buffer */
59                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
60
61                         /* FIX ... genericize this */
62                         ch = *inputmsg++;
63                         if (ch > 0) {
64                                 inbuf[strlen(inbuf)+1] = 0;
65                                 inbuf[strlen(inbuf)] = ch;
66                         } 
67                         else {
68                                 done_reading = 1;
69                         }
70
71                 }
72
73                 /* Do some parsing */
74                 if (strlen(inbuf)>0) {
75
76                     /* Fold in all the spacing */
77                     for (i=0; i<strlen(inbuf); ++i) {
78                         if (inbuf[i]==10) inbuf[i]=32;
79                         if (inbuf[i]==13) inbuf[i]=32;
80                         if (inbuf[i]==9) inbuf[i]=32;
81                         if ((inbuf[i]<32) || (inbuf[i]>126))
82                                 strcpy(&inbuf[i], &inbuf[i+1]);
83                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
84                                 strcpy(&inbuf[i], &inbuf[i+1]);
85                     }
86
87                     for (i=0; i<strlen(inbuf); ++i) {
88
89                         ch = inbuf[i];
90
91                         if (ch == '<') {
92                                 ++nest;
93                                 strcpy(tag, "");
94                         }
95
96                         else if (ch == '>') {
97                                 if (nest > 0) --nest;
98                                 
99                                 if (!strcasecmp(tag, "P")) {
100                                         strcat(outbuf, "\n\n");
101                                 }
102
103                                 if (!strcasecmp(tag, "/DIV")) {
104                                         strcat(outbuf, "\n\n");
105                                 }
106
107                                 else if (!strcasecmp(tag, "H1")) {
108                                         strcat(outbuf, "\n\n");
109                                 }
110
111                                 else if (!strcasecmp(tag, "H2")) {
112                                         strcat(outbuf, "\n\n");
113                                 }
114
115                                 else if (!strcasecmp(tag, "H3")) {
116                                         strcat(outbuf, "\n\n");
117                                 }
118
119                                 else if (!strcasecmp(tag, "H4")) {
120                                         strcat(outbuf, "\n\n");
121                                 }
122
123                                 else if (!strcasecmp(tag, "/H1")) {
124                                         strcat(outbuf, "\n");
125                                 }
126
127                                 else if (!strcasecmp(tag, "/H2")) {
128                                         strcat(outbuf, "\n");
129                                 }
130
131                                 else if (!strcasecmp(tag, "/H3")) {
132                                         strcat(outbuf, "\n");
133                                 }
134
135                                 else if (!strcasecmp(tag, "/H4")) {
136                                         strcat(outbuf, "\n");
137                                 }
138
139                                 else if (!strcasecmp(tag, "HR")) {
140                                         strcat(outbuf, "\n ");
141                                         for (j=0; j<screenwidth-2; ++j)
142                                                 strcat(outbuf, "-");
143                                         strcat(outbuf, "\n");
144                                 }
145
146                                 else if (!strcasecmp(tag, "BR")) {
147                                         strcat(outbuf, "\n");
148                                 }
149
150                                 else if (!strcasecmp(tag, "TR")) {
151                                         strcat(outbuf, "\n");
152                                 }
153
154                                 else if (!strcasecmp(tag, "/TABLE")) {
155                                         strcat(outbuf, "\n");
156                                 }
157
158                         }
159
160                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
161                                 tag[strlen(tag)+1] = 0;
162                                 tag[strlen(tag)] = ch;
163                         }
164                                 
165                         else if (!nest) {
166                                 outbuf[strlen(outbuf)+1] = 0;
167                                 outbuf[strlen(outbuf)] = ch;
168                         }
169                     }
170                     strcpy(inbuf, &inbuf[i]);
171                 }
172
173                 /* Convert &; tags to the forbidden characters */
174                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
175
176                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
177                                 outbuf[i] = ' ';
178                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
179                         }
180
181                         else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
182                                 outbuf[i] = '<';
183                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
184                         }
185
186                         else if (!strncasecmp(&outbuf[i], "&gt;", 4)) {
187                                 outbuf[i] = '>';
188                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
189                         }
190
191                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
192                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
193                         }
194
195                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
196                                 outbuf[i] = '\"';
197                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
198                         }
199
200                         else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
201                                 outbuf[i] = '(';
202                                 outbuf[i+1] = 'c';
203                                 outbuf[i+2] = ')';
204                                 strcpy(&outbuf[i+3], &outbuf[i+6]);
205                         }
206
207                         else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
208                                 outbuf[i] = '(';
209                                 outbuf[i+1] = 'r';
210                                 outbuf[i+2] = ')';
211                                 strcpy(&outbuf[i+3], &outbuf[i+5]);
212                         }
213
214                 }
215
216                 /* Make sure the output buffer is big enough */
217                 if ((strlen(outptr) + strlen(outbuf) + 128) > outlen) {
218                         outlen += 128;
219                         outptr = realloc(outptr, outlen);
220                 }
221
222                 /* Output any lines terminated with hard line breaks */
223                 do {
224                         did_out = 0;
225                         if (strlen(outbuf)>0)
226                             for (i = 0; i<strlen(outbuf); ++i) {
227                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
228                                         strncat(outptr, outbuf, i+1);
229                                         strcat(outptr, "\n");
230                                         if (do_citaformat)
231                                                 strcat(outptr, " ");
232                                         strcpy(outbuf, &outbuf[i+1]);
233                                         i = 0;
234                                         did_out = 1;
235                                 }
236                         }
237                 } while (did_out);
238
239                 /* Add soft line breaks */
240                 if (strlen(outbuf) > (screenwidth - 2)) {
241                         rb = (-1);
242                         for (i=0; i<(screenwidth-2); ++i) {
243                                 if (outbuf[i]==32) rb = i;
244                         }
245                         if (rb>=0) {
246                                 strncat(outptr, outbuf, rb);
247                                 strcat(outptr, "\n");
248                                 if (do_citaformat)
249                                         strcat(outptr, " ");
250                                 strcpy(outbuf, &outbuf[rb+1]);
251                         } else {
252
253                                 strncat(outptr, outbuf, screenwidth-2);
254                                 strcat(outptr, "\n");
255                                 if (do_citaformat)
256                                         strcat(outptr, " ");
257                                 strcpy(outbuf, &outbuf[screenwidth-2]);
258                         }
259                 }
260
261         } while (done_reading == 0);
262
263         strcat(outptr, outbuf);
264         strcat(outptr, "\n");
265
266         return outptr;
267
268 }
269