* More changes required to be able to do embedded message/rfc822 and still be
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  */
8
9 #include "webcit.h"
10 #include "vcard.h"
11 #include "webserver.h"
12
13
14 /*
15  * Sanitize and enhance an HTML message for display.
16  * Also convert weird character sets to UTF-8 if necessary.
17  */
18 void output_html(char *charset) {
19         char buf[SIZ];
20         char *msg;
21         char *ptr;
22         char *msgstart;
23         char *msgend;
24         char *converted_msg;
25         int buffer_length = 1;
26         int line_length = 0;
27         int content_length = 0;
28         int output_length = 0;
29         char new_window[SIZ];
30         int brak = 0;
31         int alevel = 0;
32         int i;
33         int linklen;
34 #ifdef HAVE_ICONV
35         iconv_t ic = (iconv_t)(-1) ;
36         char *ibuf;                   /* Buffer of characters to be converted */
37         char *obuf;                   /* Buffer for converted characters      */
38         size_t ibuflen;               /* Length of input buffer               */
39         size_t obuflen;               /* Length of output buffer              */
40         char *osav;                   /* Saved pointer to output buffer       */
41 #endif
42
43         msg = strdup("");
44         sprintf(new_window, "<A TARGET=\"%s\" HREF=", TARGET);
45
46         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
47                 line_length = strlen(buf);
48                 buffer_length = content_length + line_length + 2;
49                 msg = realloc(msg, buffer_length);
50                 if (msg == NULL) {
51                         wprintf("<B>");
52                         wprintf(_("realloc() error! couldn't get %d bytes: %s"),
53                                 buffer_length + 1,
54                                 strerror(errno));
55                         wprintf("</B><br /><br />\n");
56                         return;
57                 }
58                 strcpy(&msg[content_length], buf);
59                 content_length += line_length;
60                 strcpy(&msg[content_length], "\n");
61                 content_length += 1;
62         }
63
64 #ifdef HAVE_ICONV
65         if ( (strcasecmp(charset, "us-ascii"))
66            && (strcasecmp(charset, "UTF-8"))
67            && (strcasecmp(charset, ""))
68         ) {
69                 ic = iconv_open("UTF-8", charset);
70                 if (ic == (iconv_t)(-1) ) {
71                         lprintf(5, "%s:%d iconv_open() failed: %s\n", __FILE__, __LINE__, strerror(errno));
72                 }
73         }
74         if (ic != (iconv_t)(-1) ) {
75                 ibuf = msg;
76                 ibuflen = content_length;
77                 obuflen = content_length + (content_length / 2) ;
78                 obuf = (char *) malloc(obuflen);
79                 osav = obuf;
80                 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
81                 content_length = content_length + (content_length / 2) - obuflen;
82                 osav[content_length] = 0;
83                 free(msg);
84                 msg = osav;
85                 iconv_close(ic);
86         }
87 #endif
88
89         ptr = msg;
90         msgstart = msg;
91         msgend = &msg[content_length];
92
93         while (ptr < msgend) {
94
95                 /* Advance to next tag */
96                 ptr = strchr(ptr, '<');
97                 if ((ptr == NULL) || (ptr >= msgend)) break;
98                 ++ptr;
99                 if ((ptr == NULL) || (ptr >= msgend)) break;
100
101                 /* Any of these tags cause everything up to and including
102                  * the tag to be removed.
103                  */     
104                 if ( (!strncasecmp(ptr, "HTML", 4))
105                    ||(!strncasecmp(ptr, "HEAD", 4))
106                    ||(!strncasecmp(ptr, "/HEAD", 5))
107                    ||(!strncasecmp(ptr, "BODY", 4)) ) {
108                         ptr = strchr(ptr, '>');
109                         if ((ptr == NULL) || (ptr >= msgend)) break;
110                         ++ptr;
111                         if ((ptr == NULL) || (ptr >= msgend)) break;
112                         msgstart = ptr;
113                 }
114
115                 /* Any of these tags cause everything including and following
116                  * the tag to be removed.
117                  */
118                 if ( (!strncasecmp(ptr, "/HTML", 5))
119                    ||(!strncasecmp(ptr, "/BODY", 5)) ) {
120                         --ptr;
121                         msgend = ptr;
122                         strcpy(ptr, "");
123                         
124                 }
125
126                 ++ptr;
127         }
128
129         converted_msg = malloc(content_length);
130         strcpy(converted_msg, "");
131         ptr = msgstart;
132         while (ptr < msgend) {
133                 /* Change mailto: links to WebCit mail, by replacing the
134                  * link with one that points back to our mail room.  Due to
135                  * the way we parse URL's, it'll even handle mailto: links
136                  * that have "?subject=" in them.
137                  */
138                 if (!strncasecmp(ptr, "<A HREF=\"mailto:", 16)) {
139                         content_length += 64;
140                         converted_msg = realloc(converted_msg, content_length);
141                         sprintf(&converted_msg[output_length],
142                                 "<A HREF=\"/display_enter"
143                                 "?force_room=_MAIL_&recp=");
144                         output_length += 47;
145                         ptr = &ptr[16];
146                         ++alevel;
147                 }
148                 /* Make links open in a separate window */
149                 else if (!strncasecmp(ptr, "<A HREF=", 8)) {
150                         content_length += 64;
151                         converted_msg = realloc(converted_msg, content_length);
152                         sprintf(&converted_msg[output_length], new_window);
153                         output_length += strlen(new_window);
154                         ptr = &ptr[8];
155                         ++alevel;
156                 }
157                 /* Turn anything that looks like a URL into a real link, as long
158                  * as it's not inside a tag already
159                  */
160                 else if ( (brak == 0) && (alevel == 0)
161                      && (!strncasecmp(ptr, "http://", 7))) {
162                                 linklen = 0;
163                                 /* Find the end of the link */
164                                 for (i=0; i<=strlen(ptr); ++i) {
165                                         if ((ptr[i]==0)
166                                            ||(isspace(ptr[i]))
167                                            ||(ptr[i]==10)
168                                            ||(ptr[i]==13)
169                                            ||(ptr[i]=='(')
170                                            ||(ptr[i]==')')
171                                            ||(ptr[i]=='<')
172                                            ||(ptr[i]=='>')
173                                            ||(ptr[i]=='[')
174                                            ||(ptr[i]==']')
175                                         ) linklen = i;
176                                         if (linklen > 0) break;
177                                 }
178                                 if (linklen > 0) {
179                                         content_length += (32 + linklen);
180                                         converted_msg = realloc(converted_msg, content_length);
181                                         sprintf(&converted_msg[output_length], new_window);
182                                         output_length += strlen(new_window);
183                                         converted_msg[output_length] = '\"';
184                                         converted_msg[++output_length] = 0;
185                                         for (i=0; i<linklen; ++i) {
186                                                 converted_msg[output_length] = ptr[i];
187                                                 converted_msg[++output_length] = 0;
188                                         }
189                                         sprintf(&converted_msg[output_length], "\">");
190                                         output_length += 2;
191                                         for (i=0; i<linklen; ++i) {
192                                                 converted_msg[output_length] = *ptr++;
193                                                 converted_msg[++output_length] = 0;
194                                         }
195                                         sprintf(&converted_msg[output_length], "</A>");
196                                         output_length += 4;
197                                 }
198                 }
199                 else {
200                         /*
201                          * We need to know when we're inside a tag,
202                          * so we don't turn things that look like URL's into
203                          * links, when they're already links - or image sources.
204                          */
205                         if (*ptr == '<') ++brak;
206                         if (*ptr == '>') --brak;
207                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
208                         converted_msg[output_length] = *ptr++;
209                         converted_msg[++output_length] = 0;
210                 }
211         }
212
213         /* Output our big pile of markup */
214         client_write(converted_msg, output_length);
215
216         /* A little trailing vertical whitespace... */
217         wprintf("<br /><br />\n");
218
219         /* Now give back the memory */
220         free(converted_msg);
221         free(msg);
222 }
223