f638032f209d3cb98a71eb7a4f65fdf57145dc0d
[citadel.git] / citadel / html.c
1 /*
2  * html.c -- Functions which handle translation between HTML and plain text
3  * $Id$
4  */
5
6 #include "sysdep.h"
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <time.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <limits.h>
17 #include <syslog.h>
18 #include "citadel.h"
19 #include "server.h"
20 #include "control.h"
21 #include "sysdep_decls.h"
22 #include "support.h"
23 #include "config.h"
24 #include "msgbase.h"
25 #include "tools.h"
26 #include "room_ops.h"
27 #include "html.h"
28  
29
30 /*
31  * Convert HTML to plain text.
32  *
33  * inputmsg      = pointer to raw HTML message
34  * screenwidth   = desired output screenwidth
35  * do_citaformat = set to 1 to indent newlines with spaces
36  */
37 char *html_to_ascii(char *inputmsg, int screenwidth, int do_citaformat) {
38         char inbuf[256];
39         char outbuf[256];
40         char tag[1024];
41         int done_reading = 0;
42         char *inptr;
43         char *outptr;
44         size_t outlen;
45         int i, j, ch, did_out, rb;
46         int nest = 0;           /* Bracket nesting level */
47
48         inptr = inputmsg;
49         strcpy(inbuf, "");
50         strcpy(outbuf, "");
51
52         outptr = mallok(strlen(inptr) + 256);
53         if (outptr == NULL) return NULL;
54         strcpy(outptr, "");
55         outlen = 0;
56
57         do {
58                 /* Fill the input buffer */
59                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
60
61                         ch = *inputmsg++;
62                         if (ch > 0) {
63                                 inbuf[strlen(inbuf)+1] = 0;
64                                 inbuf[strlen(inbuf)] = ch;
65                         } 
66                         else {
67                                 done_reading = 1;
68                         }
69
70                 }
71
72                 /* Do some parsing */
73                 if (strlen(inbuf)>0) {
74
75                     /* Fold in all the spacing */
76                     for (i=0; i<strlen(inbuf); ++i) {
77                         if (inbuf[i]==10) inbuf[i]=32;
78                         if (inbuf[i]==13) inbuf[i]=32;
79                         if (inbuf[i]==9) inbuf[i]=32;
80                         if ((inbuf[i]<32) || (inbuf[i]>126))
81                                 strcpy(&inbuf[i], &inbuf[i+1]);
82                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
83                                 strcpy(&inbuf[i], &inbuf[i+1]);
84                     }
85
86                     for (i=0; i<strlen(inbuf); ++i) {
87
88                         ch = inbuf[i];
89
90                         if (ch == '<') {
91                                 ++nest;
92                                 strcpy(tag, "");
93                         }
94
95                         else if (ch == '>') {
96                                 if (nest > 0) --nest;
97                                 
98                                 if (!strcasecmp(tag, "P")) {
99                                         strcat(outbuf, "\n\n");
100                                 }
101
102                                 if (!strcasecmp(tag, "/DIV")) {
103                                         strcat(outbuf, "\n\n");
104                                 }
105
106                                 else if (!strcasecmp(tag, "H1")) {
107                                         strcat(outbuf, "\n\n");
108                                 }
109
110                                 else if (!strcasecmp(tag, "H2")) {
111                                         strcat(outbuf, "\n\n");
112                                 }
113
114                                 else if (!strcasecmp(tag, "H3")) {
115                                         strcat(outbuf, "\n\n");
116                                 }
117
118                                 else if (!strcasecmp(tag, "H4")) {
119                                         strcat(outbuf, "\n\n");
120                                 }
121
122                                 else if (!strcasecmp(tag, "/H1")) {
123                                         strcat(outbuf, "\n");
124                                 }
125
126                                 else if (!strcasecmp(tag, "/H2")) {
127                                         strcat(outbuf, "\n");
128                                 }
129
130                                 else if (!strcasecmp(tag, "/H3")) {
131                                         strcat(outbuf, "\n");
132                                 }
133
134                                 else if (!strcasecmp(tag, "/H4")) {
135                                         strcat(outbuf, "\n");
136                                 }
137
138                                 else if (!strcasecmp(tag, "HR")) {
139                                         strcat(outbuf, "\n ");
140                                         for (j=0; j<screenwidth-2; ++j)
141                                                 strcat(outbuf, "-");
142                                         strcat(outbuf, "\n");
143                                 }
144
145                                 else if (!strcasecmp(tag, "BR")) {
146                                         strcat(outbuf, "\n");
147                                 }
148
149                                 else if (!strcasecmp(tag, "TR")) {
150                                         strcat(outbuf, "\n");
151                                 }
152
153                                 else if (!strcasecmp(tag, "/TABLE")) {
154                                         strcat(outbuf, "\n");
155                                 }
156
157                         }
158
159                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
160                                 tag[strlen(tag)+1] = 0;
161                                 tag[strlen(tag)] = ch;
162                         }
163                                 
164                         else if (!nest) {
165                                 outbuf[strlen(outbuf)+1] = 0;
166                                 outbuf[strlen(outbuf)] = ch;
167                         }
168                     }
169                     strcpy(inbuf, &inbuf[i]);
170                 }
171
172                 /* Convert &; tags to the forbidden characters */
173                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
174
175                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
176                                 outbuf[i] = ' ';
177                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
178                         }
179
180                         else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
181                                 outbuf[i] = '<';
182                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
183                         }
184
185                         else if (!strncasecmp(&outbuf[i], "&gt;", 4)) {
186                                 outbuf[i] = '>';
187                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
188                         }
189
190                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
191                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
192                         }
193
194                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
195                                 outbuf[i] = '\"';
196                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
197                         }
198
199                         else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
200                                 outbuf[i] = '(';
201                                 outbuf[i+1] = 'c';
202                                 outbuf[i+2] = ')';
203                                 strcpy(&outbuf[i+3], &outbuf[i+6]);
204                         }
205
206                         else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
207                                 outbuf[i] = '(';
208                                 outbuf[i+1] = 'r';
209                                 outbuf[i+2] = ')';
210                                 strcpy(&outbuf[i+3], &outbuf[i+5]);
211                         }
212
213                 }
214
215                 /* Make sure the output buffer is big enough */
216                 if ((strlen(outptr) + strlen(outbuf) + 128) > outlen) {
217                         outlen += 128;
218                         outptr = realloc(outptr, outlen);
219                 }
220
221                 /* Output any lines terminated with hard line breaks */
222                 do {
223                         did_out = 0;
224                         if (strlen(outbuf)>0)
225                             for (i = 0; i<strlen(outbuf); ++i) {
226                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
227                                         strncat(outptr, outbuf, i+1);
228                                         strcat(outptr, "\n");
229                                         if (do_citaformat)
230                                                 strcat(outptr, " ");
231                                         strcpy(outbuf, &outbuf[i+1]);
232                                         i = 0;
233                                         did_out = 1;
234                                 }
235                         }
236                 } while (did_out);
237
238                 /* Add soft line breaks */
239                 if (strlen(outbuf) > (screenwidth - 2)) {
240                         rb = (-1);
241                         for (i=0; i<(screenwidth-2); ++i) {
242                                 if (outbuf[i]==32) rb = i;
243                         }
244                         if (rb>=0) {
245                                 strncat(outptr, outbuf, rb);
246                                 strcat(outptr, "\n");
247                                 if (do_citaformat)
248                                         strcat(outptr, " ");
249                                 strcpy(outbuf, &outbuf[rb+1]);
250                         } else {
251
252                                 strncat(outptr, outbuf, screenwidth-2);
253                                 strcat(outptr, "\n");
254                                 if (do_citaformat)
255                                         strcat(outptr, " ");
256                                 strcpy(outbuf, &outbuf[screenwidth-2]);
257                         }
258                 }
259
260         } while (done_reading == 0);
261
262         strcat(outptr, outbuf);
263         strcat(outptr, "\n");
264
265         return outptr;
266
267 }
268