]> code.citadel.org Git - citadel.git/blob - citadel/html.c
* HTML now works.
[citadel.git] / citadel / html.c
1 /*
2  * html.c -- Functions which handle translation between HTML and plain text
3  * $Id$
4  */
5
6 #include "sysdep.h"
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <time.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <limits.h>
17 #ifdef HAVE_PTHREAD_H
18 #include <pthread.h>
19 #endif
20 #include <syslog.h>
21 #include "citadel.h"
22 #include "server.h"
23 #include "control.h"
24 #include "sysdep_decls.h"
25 #include "support.h"
26 #include "config.h"
27 #include "msgbase.h"
28 #include "tools.h"
29 #include "room_ops.h"
30 #include "html.h"
31  
32
33 /*
34  * Convert HTML to plain text.
35  */
36 char *html_to_ascii(char *inputmsg, int screenwidth) {
37         char inbuf[256];
38         char outbuf[256];
39         char tag[1024];
40         int done_reading = 0;
41         char *inptr;
42         char *outptr;
43         size_t outlen;
44         int i, j, ch, did_out, rb;
45         int nest = 0;           /* Bracket nesting level */
46
47         inptr = inputmsg;
48         strcpy(inbuf, "");
49         strcpy(outbuf, "");
50
51         outptr = mallok(strlen(inptr) + 256);
52         if (outptr == NULL) return NULL;
53         strcpy(outptr, "");
54         outlen = 0;
55
56         do {
57                 /* Fill the input buffer */
58                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
59
60                         /* FIX ... genericize this */
61                         ch = *inputmsg++;
62                         if (ch > 0) {
63                                 inbuf[strlen(inbuf)+1] = 0;
64                                 inbuf[strlen(inbuf)] = ch;
65                         } 
66                         else {
67                                 done_reading = 1;
68                         }
69
70                 }
71
72                 /* Do some parsing */
73                 if (strlen(inbuf)>0) {
74
75                     /* Fold in all the spacing */
76                     for (i=0; i<strlen(inbuf); ++i) {
77                         if (inbuf[i]==10) inbuf[i]=32;
78                         if (inbuf[i]==13) inbuf[i]=32;
79                         if (inbuf[i]==9) inbuf[i]=32;
80                         if ((inbuf[i]<32) || (inbuf[i]>126))
81                                 strcpy(&inbuf[i], &inbuf[i+1]);
82                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
83                                 strcpy(&inbuf[i], &inbuf[i+1]);
84                     }
85
86                     for (i=0; i<strlen(inbuf); ++i) {
87
88                         ch = inbuf[i];
89
90                         if (ch == '<') {
91                                 ++nest;
92                                 strcpy(tag, "");
93                         }
94
95                         else if (ch == '>') {
96                                 if (nest > 0) --nest;
97                                 
98                                 if (!strcasecmp(tag, "P")) {
99                                         strcat(outbuf, "\n\n");
100                                 }
101
102                                 else if (!strcasecmp(tag, "H1")) {
103                                         strcat(outbuf, "\n\n");
104                                 }
105
106                                 else if (!strcasecmp(tag, "H2")) {
107                                         strcat(outbuf, "\n\n");
108                                 }
109
110                                 else if (!strcasecmp(tag, "H3")) {
111                                         strcat(outbuf, "\n\n");
112                                 }
113
114                                 else if (!strcasecmp(tag, "H4")) {
115                                         strcat(outbuf, "\n\n");
116                                 }
117
118                                 else if (!strcasecmp(tag, "/H1")) {
119                                         strcat(outbuf, "\n");
120                                 }
121
122                                 else if (!strcasecmp(tag, "/H2")) {
123                                         strcat(outbuf, "\n");
124                                 }
125
126                                 else if (!strcasecmp(tag, "/H3")) {
127                                         strcat(outbuf, "\n");
128                                 }
129
130                                 else if (!strcasecmp(tag, "/H4")) {
131                                         strcat(outbuf, "\n");
132                                 }
133
134                                 else if (!strcasecmp(tag, "HR")) {
135                                         strcat(outbuf, "\n ");
136                                         for (j=0; j<screenwidth-2; ++j)
137                                                 strcat(outbuf, "-");
138                                         strcat(outbuf, "\n");
139                                 }
140
141                                 else if (!strcasecmp(tag, "BR")) {
142                                         strcat(outbuf, "\n");
143                                 }
144
145                                 else if (!strcasecmp(tag, "TR")) {
146                                         strcat(outbuf, "\n");
147                                 }
148
149                                 else if (!strcasecmp(tag, "/TABLE")) {
150                                         strcat(outbuf, "\n");
151                                 }
152
153                         }
154
155                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
156                                 tag[strlen(tag)+1] = 0;
157                                 tag[strlen(tag)] = ch;
158                         }
159                                 
160                         else if (!nest) {
161                                 outbuf[strlen(outbuf)+1] = 0;
162                                 outbuf[strlen(outbuf)] = ch;
163                         }
164                     }
165                     strcpy(inbuf, &inbuf[i]);
166                 }
167
168                 /* Convert &; tags to the forbidden characters */
169                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
170
171                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
172                                 outbuf[i] = ' ';
173                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
174                         }
175
176                         else if (!strncasecmp(&outbuf[i], "&lt;", 4)) {
177                                 outbuf[i] = '<';
178                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
179                         }
180
181                         else if (!strncasecmp(&outbuf[i], "&gt;", 4)) {
182                                 outbuf[i] = '>';
183                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
184                         }
185
186                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
187                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
188                         }
189
190                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
191                                 outbuf[i] = '\"';
192                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
193                         }
194
195                         else if (!strncasecmp(&outbuf[i], "&copy;", 6)) {
196                                 outbuf[i] = '(';
197                                 outbuf[i+1] = 'c';
198                                 outbuf[i+2] = ')';
199                                 strcpy(&outbuf[i+3], &outbuf[i+6]);
200                         }
201
202                         else if (!strncasecmp(&outbuf[i], "&reg;", 5)) {
203                                 outbuf[i] = '(';
204                                 outbuf[i+1] = 'r';
205                                 outbuf[i+2] = ')';
206                                 strcpy(&outbuf[i+3], &outbuf[i+5]);
207                         }
208
209                 }
210
211                 /* Make sure the output buffer is big enough */
212                 if ((strlen(outptr) + strlen(outbuf) + 128) > outlen) {
213                         outlen += 128;
214                         outptr = realloc(outptr, outlen);
215                 }
216
217                 /* Output any lines terminated with hard line breaks */
218                 do {
219                         did_out = 0;
220                         if (strlen(outbuf)>0)
221                             for (i = 0; i<strlen(outbuf); ++i) {
222                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
223
224                                         strncat(outptr, outbuf, i+1);
225
226                                         strcpy(outbuf, &outbuf[i+1]);
227                                         i = 0;
228                                         did_out = 1;
229                                 }
230                         }
231                 } while (did_out);
232
233                 /* Add soft line breaks */
234                 if (strlen(outbuf) > (screenwidth - 2)) {
235                         rb = (-1);
236                         for (i=0; i<(screenwidth-2); ++i) {
237                                 if (outbuf[i]==32) rb = i;
238                         }
239                         if (rb>=0) {
240
241                                 strncat(outptr, outbuf, rb);
242                                 strcat(outptr, "\n");
243
244                                 strcpy(outbuf, &outbuf[rb+1]);
245                         } else {
246
247                                 strncat(outptr, outbuf, screenwidth-2);
248                                 strcat(outptr, "\n");
249
250                                 strcpy(outbuf, &outbuf[screenwidth-2]);
251                         }
252                 }
253
254         } while (done_reading == 0);
255
256         strcat(outptr, outbuf);
257         strcat(outptr, "\n");
258
259         return outptr;
260
261 }
262