* calendar_view.c event.c floors.c graphics.c html2html.c iconbar.c: i18n
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  */
8
9 #include "webcit.h"
10 #include "vcard.h"
11 #include "webserver.h"
12
13
14 /*
15  * Sanitize and enhance an HTML message for display.
16  * Also convert weird character sets to UTF-8 if necessary.
17  */
18 void output_html(char *charset) {
19         char buf[SIZ];
20         char *msg;
21         char *ptr;
22         char *msgstart;
23         char *msgend;
24         char *converted_msg;
25         int buffer_length = 1;
26         int line_length = 0;
27         int content_length = 0;
28         int output_length = 0;
29         char new_window[SIZ];
30         int brak = 0;
31         int alevel = 0;
32         int i;
33         int linklen;
34 #ifdef HAVE_ICONV
35         iconv_t ic = (iconv_t)(-1) ;
36         char *ibuf;                   /* Buffer of characters to be converted */
37         char *obuf;                   /* Buffer for converted characters      */
38         size_t ibuflen;               /* Length of input buffer               */
39         size_t obuflen;               /* Length of output buffer              */
40         char *osav;                   /* Saved pointer to output buffer       */
41 #endif
42
43         msg = strdup("");
44         sprintf(new_window, "<A TARGET=\"%s\" HREF=", TARGET);
45
46         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
47                 line_length = strlen(buf);
48                 buffer_length = content_length + line_length + 2;
49                 msg = realloc(msg, buffer_length);
50                 if (msg == NULL) {
51                         wprintf("<B>");
52                         wprintf(_("realloc() error! couldn't get %d bytes: %s"),
53                                 buffer_length + 1,
54                                 strerror(errno));
55                         wprintf("</B><br /><br />\n");
56                         return;
57                 }
58                 strcpy(&msg[content_length], buf);
59                 content_length += line_length;
60                 strcpy(&msg[content_length], "\n");
61                 content_length += 1;
62         }
63
64 #ifdef HAVE_ICONV
65         if ( (strcasecmp(charset, "us-ascii"))
66            && (strcasecmp(charset, "UTF-8")) ) {
67                 ic = iconv_open("UTF-8", charset);
68                 if (ic == (iconv_t)(-1) ) {
69                         lprintf(5, "iconv_open() failed: %s\n", strerror(errno));
70                 }
71         }
72         if (ic != (iconv_t)(-1) ) {
73                 ibuf = msg;
74                 ibuflen = content_length;
75                 obuflen = content_length + (content_length / 2) ;
76                 obuf = (char *) malloc(obuflen);
77                 osav = obuf;
78                 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
79                 content_length = content_length + (content_length / 2) - obuflen;
80                 osav[content_length] = 0;
81                 free(msg);
82                 msg = osav;
83                 iconv_close(ic);
84         }
85 #endif
86
87         ptr = msg;
88         msgstart = msg;
89         msgend = &msg[content_length];
90
91         while (ptr < msgend) {
92
93                 /* Advance to next tag */
94                 ptr = strchr(ptr, '<');
95                 if ((ptr == NULL) || (ptr >= msgend)) break;
96                 ++ptr;
97                 if ((ptr == NULL) || (ptr >= msgend)) break;
98
99                 /* Any of these tags cause everything up to and including
100                  * the tag to be removed.
101                  */     
102                 if ( (!strncasecmp(ptr, "HTML", 4))
103                    ||(!strncasecmp(ptr, "HEAD", 4))
104                    ||(!strncasecmp(ptr, "/HEAD", 5))
105                    ||(!strncasecmp(ptr, "BODY", 4)) ) {
106                         ptr = strchr(ptr, '>');
107                         if ((ptr == NULL) || (ptr >= msgend)) break;
108                         ++ptr;
109                         if ((ptr == NULL) || (ptr >= msgend)) break;
110                         msgstart = ptr;
111                 }
112
113                 /* Any of these tags cause everything including and following
114                  * the tag to be removed.
115                  */
116                 if ( (!strncasecmp(ptr, "/HTML", 5))
117                    ||(!strncasecmp(ptr, "/BODY", 5)) ) {
118                         --ptr;
119                         msgend = ptr;
120                         strcpy(ptr, "");
121                         
122                 }
123
124                 ++ptr;
125         }
126
127         converted_msg = malloc(content_length);
128         strcpy(converted_msg, "");
129         ptr = msgstart;
130         while (ptr < msgend) {
131                 /* Change mailto: links to WebCit mail, by replacing the
132                  * link with one that points back to our mail room.  Due to
133                  * the way we parse URL's, it'll even handle mailto: links
134                  * that have "?subject=" in them.
135                  */
136                 if (!strncasecmp(ptr, "<A HREF=\"mailto:", 16)) {
137                         content_length += 64;
138                         converted_msg = realloc(converted_msg, content_length);
139                         sprintf(&converted_msg[output_length],
140                                 "<A HREF=\"/display_enter"
141                                 "?force_room=_MAIL_&recp=");
142                         output_length += 47;
143                         ptr = &ptr[16];
144                         ++alevel;
145                 }
146                 /* Make links open in a separate window */
147                 else if (!strncasecmp(ptr, "<A HREF=", 8)) {
148                         content_length += 64;
149                         converted_msg = realloc(converted_msg, content_length);
150                         sprintf(&converted_msg[output_length], new_window);
151                         output_length += strlen(new_window);
152                         ptr = &ptr[8];
153                         ++alevel;
154                 }
155                 /* Turn anything that looks like a URL into a real link, as long
156                  * as it's not inside a tag already
157                  */
158                 else if ( (brak == 0) && (alevel == 0)
159                      && (!strncasecmp(ptr, "http://", 7))) {
160                                 linklen = 0;
161                                 /* Find the end of the link */
162                                 for (i=0; i<=strlen(ptr); ++i) {
163                                         if ((ptr[i]==0)
164                                            ||(isspace(ptr[i]))
165                                            ||(ptr[i]==10)
166                                            ||(ptr[i]==13)
167                                            ||(ptr[i]=='(')
168                                            ||(ptr[i]==')')
169                                            ||(ptr[i]=='<')
170                                            ||(ptr[i]=='>')
171                                            ||(ptr[i]=='[')
172                                            ||(ptr[i]==']')
173                                         ) linklen = i;
174                                         if (linklen > 0) break;
175                                 }
176                                 if (linklen > 0) {
177                                         content_length += (32 + linklen);
178                                         converted_msg = realloc(converted_msg, content_length);
179                                         sprintf(&converted_msg[output_length], new_window);
180                                         output_length += strlen(new_window);
181                                         converted_msg[output_length] = '\"';
182                                         converted_msg[++output_length] = 0;
183                                         for (i=0; i<linklen; ++i) {
184                                                 converted_msg[output_length] = ptr[i];
185                                                 converted_msg[++output_length] = 0;
186                                         }
187                                         sprintf(&converted_msg[output_length], "\">");
188                                         output_length += 2;
189                                         for (i=0; i<linklen; ++i) {
190                                                 converted_msg[output_length] = *ptr++;
191                                                 converted_msg[++output_length] = 0;
192                                         }
193                                         sprintf(&converted_msg[output_length], "</A>");
194                                         output_length += 4;
195                                 }
196                 }
197                 else {
198                         /*
199                          * We need to know when we're inside a tag,
200                          * so we don't turn things that look like URL's into
201                          * links, when they're already links - or image sources.
202                          */
203                         if (*ptr == '<') ++brak;
204                         if (*ptr == '>') --brak;
205                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
206                         converted_msg[output_length] = *ptr++;
207                         converted_msg[++output_length] = 0;
208                 }
209         }
210
211         /* Output our big pile of markup */
212         client_write(converted_msg, output_length);
213
214         /* A little trailing vertical whitespace... */
215         wprintf("<br /><br />\n");
216
217         /* Now give back the memory */
218         free(converted_msg);
219         free(msg);
220 }
221