]> code.citadel.org Git - citadel.git/blob - citadel/html.c
* HTML updates
[citadel.git] / citadel / html.c
1 /*
2  * html.c -- Functions which handle translation between HTML and plain text
3  * $Id$
4  */
5
6 #include "sysdep.h"
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <fcntl.h>
11 #include <signal.h>
12 #include <time.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <limits.h>
17 #ifdef HAVE_PTHREAD_H
18 #include <pthread.h>
19 #endif
20 #include <syslog.h>
21 #include "citadel.h"
22 #include "server.h"
23 #include "control.h"
24 #include "sysdep_decls.h"
25 #include "support.h"
26 #include "config.h"
27 #include "msgbase.h"
28 #include "tools.h"
29 #include "room_ops.h"
30 #include "html.h"
31
32 /*
33  * Convert HTML to plain text.
34  */
35 char *html_to_ascii(char *inputmsg, int screenwidth) {
36         char inbuf[256];
37         char outbuf[256];
38         char tag[1024];
39         int done_reading = 0;
40         char *readptr, *outputbuf;
41         size_t input_length, this_read, output_length;
42         int i, ch, did_out, rb;
43         int nest = 0;           /* Bracket nesting level */
44
45         input_length = strlen(inputmsg);
46         readptr = inputmsg;
47         output_length = strlen(inputmsg);
48         outputbuf = mallok(output_length);
49         if (outputbuf==NULL) return NULL;
50         strcpy(inbuf, "");
51         strcpy(outbuf, "");
52
53         lprintf(9, "Decoding %d bytes of HTML\n", input_length);
54
55         do {
56                 /* Fill the input buffer */
57                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
58
59                         /* copy from the input buffer */
60                         lprintf(9, "input loop\n");
61                         this_read = strlen(readptr);
62                         if (this_read > 127) this_read = 127;
63                         lprintf(9, "%d bytes\n", this_read);
64                         for (i=0; i<this_read; ++i) {
65                                 inbuf[strlen(inbuf)+1] = 0;
66                                 inbuf[strlen(inbuf)] = readptr[0];
67                                 ++readptr;
68                         }
69
70                         if (strlen(readptr)==0) done_reading = 1;
71                 }
72                 else {
73                         lprintf(9, "skipped input loop\n");
74                 }
75
76                 /* Do some parsing */
77                 lprintf(9, "parse loop\n");
78                 if (strlen(inbuf)>0) {
79
80                     /* Fold in all the spacing */
81                         lprintf(9, "spacing loop\n");
82                     for (i=0; i<strlen(inbuf); ++i) {
83                         if (inbuf[i]==10) inbuf[i]=32;
84                         if (inbuf[i]==13) inbuf[i]=32;
85                         if (inbuf[i]==9) inbuf[i]=32;
86                         if ((inbuf[i]<32) || (inbuf[i]>126))
87                                 strcpy(&inbuf[i], &inbuf[i+1]);
88                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
89                                 strcpy(&inbuf[i], &inbuf[i+1]);
90                     }
91
92                         lprintf(9, "foo loop\n");
93                     for (i=0; i<strlen(inbuf); ++i) {
94
95                         ch = inbuf[i];
96
97                         if (ch == '<') {
98                         lprintf(9, "bar loop\n");
99                                 ++nest;
100                                 strcpy(tag, "");
101                         }
102
103                         else if (ch == '>') {
104                         lprintf(9, "baz loop\n");
105                                 if (nest > 0) --nest;
106                                 
107                                 if (!strcasecmp(tag, "P")) {
108                                         strcat(outbuf, "\n\n");
109                                 }
110
111                                 else if (!strcasecmp(tag, "H1")) {
112                                         strcat(outbuf, "\n\n");
113                                 }
114
115                                 else if (!strcasecmp(tag, "H2")) {
116                                         strcat(outbuf, "\n\n");
117                                 }
118
119                                 else if (!strcasecmp(tag, "H3")) {
120                                         strcat(outbuf, "\n\n");
121                                 }
122
123                                 else if (!strcasecmp(tag, "H4")) {
124                                         strcat(outbuf, "\n\n");
125                                 }
126
127                                 else if (!strcasecmp(tag, "/H1")) {
128                                         strcat(outbuf, "\n");
129                                 }
130
131                                 else if (!strcasecmp(tag, "/H2")) {
132                                         strcat(outbuf, "\n");
133                                 }
134
135                                 else if (!strcasecmp(tag, "/H3")) {
136                                         strcat(outbuf, "\n");
137                                 }
138
139                                 else if (!strcasecmp(tag, "/H4")) {
140                                         strcat(outbuf, "\n");
141                                 }
142
143                                 else if (!strcasecmp(tag, "HR")) {
144                                         strcat(outbuf, "\n ");
145                                         for (i=0; i<screenwidth-2; ++i)
146                                                 strcat(outbuf, "-");
147                                         strcat(outbuf, "\n");
148                                 }
149
150                                 else if (!strcasecmp(tag, "BR")) {
151                                         strcat(outbuf, "\n");
152                                 }
153
154                                 else if (!strcasecmp(tag, "TR")) {
155                                         strcat(outbuf, "\n");
156                                 }
157
158                                 else if (!strcasecmp(tag, "/TABLE")) {
159                                         strcat(outbuf, "\n");
160                                 }
161
162                         }
163
164                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
165                                 tag[strlen(tag)+1] = 0;
166                                 tag[strlen(tag)] = ch;
167                         }
168                                 
169                         else if (!nest) {
170                                 outbuf[strlen(outbuf)+1] = 0;
171                                 outbuf[strlen(outbuf)] = ch;
172                         }
173                     }
174                     strcpy(inbuf, &inbuf[i]);
175                 }
176
177                 lprintf(9, "checkquepoynte\n");
178
179                 /* Convert &; tags to the forbidden characters */
180                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
181                         lprintf(9, "eek loop\n");
182
183                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
184                                 outbuf[i] = ' ';
185                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
186                         }
187
188                         else if (!strncasecmp(&outbuf[i], "&lb;", 4)) {
189                                 outbuf[i] = '<';
190                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
191                         }
192
193                         else if (!strncasecmp(&outbuf[i], "&rb;", 4)) {
194                                 outbuf[i] = '>';
195                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
196                         }
197
198                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
199                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
200                         }
201
202                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
203                                 outbuf[i] = '\"';
204                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
205                         }
206
207                 }
208
209                 /* Make sure the output buffer is big enough */
210                 if ((strlen(outputbuf) + strlen(outbuf) + 2) > output_length) {
211                         lprintf(9, "realloc loop\n");
212                         output_length = output_length + strlen(outbuf) + 2;
213                         outputbuf = reallok(outputbuf, output_length);
214                 }
215
216                 /* Output any lines terminated with hard line breaks */
217                 lprintf(9, "output loop 1\n");
218                 do {
219                         did_out = 0;
220                         if (strlen(outbuf)>0)
221                             for (i = 0; i<strlen(outbuf); ++i) {
222                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
223                                         strncat(outputbuf, outbuf, i+1);
224                                         strcpy(outbuf, &outbuf[i+1]);
225                                         i = 0;
226                                         did_out = 1;
227                                 }
228                         }
229                 } while (did_out);
230
231                 /* Add soft line breaks */
232                 lprintf(9, "output loop 2\n");
233                 if (strlen(outbuf) > (screenwidth - 2)) {
234                         rb = (-1);
235                         for (i=0; i<(screenwidth-2); ++i) {
236                                 if (outbuf[i]==32) rb = i;
237                         }
238                         if (rb>=0) {
239                                 strncat(outputbuf, outbuf, rb);
240                                 strcat(outputbuf, "\n");
241                                 strcpy(outbuf, &outbuf[rb+1]);
242                         } else {
243                                 strncat(outputbuf, outbuf, screenwidth-2);
244                                 strcat(outputbuf, "\n");
245                                 strcpy(outbuf, &outbuf[screenwidth-2]);
246                         }
247                 }
248
249         } while (done_reading == 0);
250         lprintf(9, "output loop 3\n");
251         strncat(outputbuf, outbuf, strlen(outbuf));
252         strcat(outputbuf, "\n");
253
254         return outputbuf;
255 }