* All OS-level includes are now included from webcit.h instead of from
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  */
8
9 #include "webcit.h"
10 #include "vcard.h"
11 #include "webserver.h"
12
13
14 /*
15  * Sanitize and enhance an HTML message for display.
16  * Also convert weird character sets to UTF-8 if necessary.
17  */
18 void output_html(char *charset) {
19         char buf[SIZ];
20         char *msg;
21         char *ptr;
22         char *msgstart;
23         char *msgend;
24         char *converted_msg;
25         int buffer_length = 1;
26         int line_length = 0;
27         int content_length = 0;
28         int output_length = 0;
29         char new_window[SIZ];
30         int brak = 0;
31         int alevel = 0;
32         int i;
33         int linklen;
34 #ifdef HAVE_ICONV
35         iconv_t ic = (iconv_t)(-1) ;
36         char *ibuf;                   /* Buffer of characters to be converted */
37         char *obuf;                   /* Buffer for converted characters      */
38         size_t ibuflen;               /* Length of input buffer               */
39         size_t obuflen;               /* Length of output buffer              */
40         char *osav;                   /* Saved pointer to output buffer       */
41 #endif
42
43         msg = strdup("");
44         sprintf(new_window, "<A TARGET=\"%s\" HREF=", TARGET);
45
46         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
47                 line_length = strlen(buf);
48                 buffer_length = content_length + line_length + 2;
49                 msg = realloc(msg, buffer_length);
50                 if (msg == NULL) {
51                         wprintf("<B>realloc() error!  "
52                                 "couldn't get %d bytes: %s</B><br /><br />\n",
53                                 buffer_length + 1,
54                                 strerror(errno));
55                         return;
56                 }
57                 strcpy(&msg[content_length], buf);
58                 content_length += line_length;
59                 strcpy(&msg[content_length], "\n");
60                 content_length += 1;
61         }
62
63 #ifdef HAVE_ICONV
64         if ( (strcasecmp(charset, "us-ascii"))
65            && (strcasecmp(charset, "UTF-8")) ) {
66                 ic = iconv_open("UTF-8", charset);
67                 if (ic == (iconv_t)(-1) ) {
68                         lprintf(5, "iconv_open() failed: %s\n", strerror(errno));
69                 }
70         }
71         if (ic != (iconv_t)(-1) ) {
72                 ibuf = msg;
73                 ibuflen = content_length;
74                 obuflen = content_length + (content_length / 2) ;
75                 obuf = (char *) malloc(obuflen);
76                 osav = obuf;
77                 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
78                 content_length = content_length + (content_length / 2) - obuflen;
79                 osav[content_length] = 0;
80                 free(msg);
81                 msg = osav;
82                 iconv_close(ic);
83         }
84 #endif
85
86         ptr = msg;
87         msgstart = msg;
88         msgend = &msg[content_length];
89
90         while (ptr < msgend) {
91
92                 /* Advance to next tag */
93                 ptr = strchr(ptr, '<');
94                 if ((ptr == NULL) || (ptr >= msgend)) break;
95                 ++ptr;
96                 if ((ptr == NULL) || (ptr >= msgend)) break;
97
98                 /* Any of these tags cause everything up to and including
99                  * the tag to be removed.
100                  */     
101                 if ( (!strncasecmp(ptr, "HTML", 4))
102                    ||(!strncasecmp(ptr, "HEAD", 4))
103                    ||(!strncasecmp(ptr, "/HEAD", 5))
104                    ||(!strncasecmp(ptr, "BODY", 4)) ) {
105                         ptr = strchr(ptr, '>');
106                         if ((ptr == NULL) || (ptr >= msgend)) break;
107                         ++ptr;
108                         if ((ptr == NULL) || (ptr >= msgend)) break;
109                         msgstart = ptr;
110                 }
111
112                 /* Any of these tags cause everything including and following
113                  * the tag to be removed.
114                  */
115                 if ( (!strncasecmp(ptr, "/HTML", 5))
116                    ||(!strncasecmp(ptr, "/BODY", 5)) ) {
117                         --ptr;
118                         msgend = ptr;
119                         strcpy(ptr, "");
120                         
121                 }
122
123                 ++ptr;
124         }
125
126         converted_msg = malloc(content_length);
127         strcpy(converted_msg, "");
128         ptr = msgstart;
129         while (ptr < msgend) {
130                 /* Change mailto: links to WebCit mail, by replacing the
131                  * link with one that points back to our mail room.  Due to
132                  * the way we parse URL's, it'll even handle mailto: links
133                  * that have "?subject=" in them.
134                  */
135                 if (!strncasecmp(ptr, "<A HREF=\"mailto:", 16)) {
136                         content_length += 64;
137                         converted_msg = realloc(converted_msg, content_length);
138                         sprintf(&converted_msg[output_length],
139                                 "<A HREF=\"/display_enter"
140                                 "?force_room=_MAIL_&recp=");
141                         output_length += 47;
142                         ptr = &ptr[16];
143                         ++alevel;
144                 }
145                 /* Make links open in a separate window */
146                 else if (!strncasecmp(ptr, "<A HREF=", 8)) {
147                         content_length += 64;
148                         converted_msg = realloc(converted_msg, content_length);
149                         sprintf(&converted_msg[output_length], new_window);
150                         output_length += strlen(new_window);
151                         ptr = &ptr[8];
152                         ++alevel;
153                 }
154                 /* Turn anything that looks like a URL into a real link, as long
155                  * as it's not inside a tag already
156                  */
157                 else if ( (brak == 0) && (alevel == 0)
158                      && (!strncasecmp(ptr, "http://", 7))) {
159                                 linklen = 0;
160                                 /* Find the end of the link */
161                                 for (i=0; i<=strlen(ptr); ++i) {
162                                         if ((ptr[i]==0)
163                                            ||(isspace(ptr[i]))
164                                            ||(ptr[i]==10)
165                                            ||(ptr[i]==13)
166                                            ||(ptr[i]=='(')
167                                            ||(ptr[i]==')')
168                                            ||(ptr[i]=='<')
169                                            ||(ptr[i]=='>')
170                                            ||(ptr[i]=='[')
171                                            ||(ptr[i]==']')
172                                         ) linklen = i;
173                                         if (linklen > 0) break;
174                                 }
175                                 if (linklen > 0) {
176                                         content_length += (32 + linklen);
177                                         converted_msg = realloc(converted_msg, content_length);
178                                         sprintf(&converted_msg[output_length], new_window);
179                                         output_length += strlen(new_window);
180                                         converted_msg[output_length] = '\"';
181                                         converted_msg[++output_length] = 0;
182                                         for (i=0; i<linklen; ++i) {
183                                                 converted_msg[output_length] = ptr[i];
184                                                 converted_msg[++output_length] = 0;
185                                         }
186                                         sprintf(&converted_msg[output_length], "\">");
187                                         output_length += 2;
188                                         for (i=0; i<linklen; ++i) {
189                                                 converted_msg[output_length] = *ptr++;
190                                                 converted_msg[++output_length] = 0;
191                                         }
192                                         sprintf(&converted_msg[output_length], "</A>");
193                                         output_length += 4;
194                                 }
195                 }
196                 else {
197                         /*
198                          * We need to know when we're inside a tag,
199                          * so we don't turn things that look like URL's into
200                          * links, when they're already links - or image sources.
201                          */
202                         if (*ptr == '<') ++brak;
203                         if (*ptr == '>') --brak;
204                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
205                         converted_msg[output_length] = *ptr++;
206                         converted_msg[++output_length] = 0;
207                 }
208         }
209
210         /* Output our big pile of markup */
211         client_write(converted_msg, output_length);
212
213         /* A little trailing vertical whitespace... */
214         wprintf("<br /><br />\n");
215
216         /* Now give back the memory */
217         free(converted_msg);
218         free(msg);
219 }
220