* Removed all of the absolute URL's.
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  */
8
9 #include "webcit.h"
10 #include "vcard.h"
11 #include "webserver.h"
12
13
14 /*
15  * Sanitize and enhance an HTML message for display.
16  * Also convert weird character sets to UTF-8 if necessary.
17  */
18 void output_html(char *charset) {
19         char buf[SIZ];
20         char *msg;
21         char *ptr;
22         char *msgstart;
23         char *msgend;
24         char *converted_msg;
25         int buffer_length = 1;
26         int line_length = 0;
27         int content_length = 0;
28         int output_length = 0;
29         char new_window[SIZ];
30         int brak = 0;
31         int alevel = 0;
32         int i;
33         int linklen;
34 #ifdef HAVE_ICONV
35         iconv_t ic = (iconv_t)(-1) ;
36         char *ibuf;                   /* Buffer of characters to be converted */
37         char *obuf;                   /* Buffer for converted characters      */
38         size_t ibuflen;               /* Length of input buffer               */
39         size_t obuflen;               /* Length of output buffer              */
40         char *osav;                   /* Saved pointer to output buffer       */
41 #endif
42
43         msg = strdup("");
44         sprintf(new_window, "<a target=\"%s\" href=", TARGET);
45
46         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
47                 line_length = strlen(buf);
48                 buffer_length = content_length + line_length + 2;
49                 msg = realloc(msg, buffer_length);
50                 if (msg == NULL) {
51                         wprintf("<b>");
52                         wprintf(_("realloc() error! couldn't get %d bytes: %s"),
53                                 buffer_length + 1,
54                                 strerror(errno));
55                         wprintf("</b><br /><br />\n");
56                         return;
57                 }
58                 strcpy(&msg[content_length], buf);
59                 content_length += line_length;
60                 strcpy(&msg[content_length], "\n");
61                 content_length += 1;
62         }
63
64 #ifdef HAVE_ICONV
65         if ( (strcasecmp(charset, "us-ascii"))
66            && (strcasecmp(charset, "UTF-8"))
67            && (strcasecmp(charset, ""))
68         ) {
69                 ic = iconv_open("UTF-8", charset);
70                 if (ic == (iconv_t)(-1) ) {
71                         lprintf(5, "%s:%d iconv_open() failed: %s\n",
72                                 __FILE__, __LINE__, strerror(errno));
73                 }
74         }
75         if (ic != (iconv_t)(-1) ) {
76                 ibuf = msg;
77                 ibuflen = content_length;
78                 obuflen = content_length + (content_length / 2) ;
79                 obuf = (char *) malloc(obuflen);
80                 osav = obuf;
81                 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
82                 content_length = content_length + (content_length / 2) - obuflen;
83                 osav[content_length] = 0;
84                 free(msg);
85                 msg = osav;
86                 iconv_close(ic);
87         }
88 #endif
89
90         ptr = msg;
91         msgstart = msg;
92         msgend = &msg[content_length];
93
94         while (ptr < msgend) {
95
96                 /* Advance to next tag */
97                 ptr = strchr(ptr, '<');
98                 if ((ptr == NULL) || (ptr >= msgend)) break;
99                 ++ptr;
100                 if ((ptr == NULL) || (ptr >= msgend)) break;
101
102                 /* Any of these tags cause everything up to and including
103                  * the tag to be removed.
104                  */     
105                 if ( (!strncasecmp(ptr, "HTML", 4))
106                    ||(!strncasecmp(ptr, "HEAD", 4))
107                    ||(!strncasecmp(ptr, "/HEAD", 5))
108                    ||(!strncasecmp(ptr, "BODY", 4)) ) {
109                         ptr = strchr(ptr, '>');
110                         if ((ptr == NULL) || (ptr >= msgend)) break;
111                         ++ptr;
112                         if ((ptr == NULL) || (ptr >= msgend)) break;
113                         msgstart = ptr;
114                 }
115
116                 /* Any of these tags cause everything including and following
117                  * the tag to be removed.
118                  */
119                 if ( (!strncasecmp(ptr, "/HTML", 5))
120                    ||(!strncasecmp(ptr, "/BODY", 5)) ) {
121                         --ptr;
122                         msgend = ptr;
123                         strcpy(ptr, "");
124                         
125                 }
126
127                 ++ptr;
128         }
129
130         converted_msg = malloc(content_length);
131         strcpy(converted_msg, "");
132         ptr = msgstart;
133         while (ptr < msgend) {
134                 /* Change mailto: links to WebCit mail, by replacing the
135                  * link with one that points back to our mail room.  Due to
136                  * the way we parse URL's, it'll even handle mailto: links
137                  * that have "?subject=" in them.
138                  */
139                 if (!strncasecmp(ptr, "<a href=\"mailto:", 16)) {
140                         content_length += 64;
141                         converted_msg = realloc(converted_msg, content_length);
142                         sprintf(&converted_msg[output_length],
143                                 "<a href=\"display_enter"
144                                 "?force_room=_MAIL_&recp=");
145                         output_length += 47;
146                         ptr = &ptr[16];
147                         ++alevel;
148                 }
149                 /* Make links open in a separate window */
150                 else if (!strncasecmp(ptr, "<a href=", 8)) {
151                         content_length += 64;
152                         converted_msg = realloc(converted_msg, content_length);
153                         sprintf(&converted_msg[output_length], new_window);
154                         output_length += strlen(new_window);
155                         ptr = &ptr[8];
156                         ++alevel;
157                 }
158                 /* Turn anything that looks like a URL into a real link, as long
159                  * as it's not inside a tag already
160                  */
161                 else if ( (brak == 0) && (alevel == 0)
162                      && (!strncasecmp(ptr, "http://", 7))) {
163                                 linklen = 0;
164                                 /* Find the end of the link */
165                                 for (i=0; i<=strlen(ptr); ++i) {
166                                         if ((ptr[i]==0)
167                                            ||(isspace(ptr[i]))
168                                            ||(ptr[i]==10)
169                                            ||(ptr[i]==13)
170                                            ||(ptr[i]=='(')
171                                            ||(ptr[i]==')')
172                                            ||(ptr[i]=='<')
173                                            ||(ptr[i]=='>')
174                                            ||(ptr[i]=='[')
175                                            ||(ptr[i]==']')
176                                         ) linklen = i;
177                                         if (linklen > 0) break;
178                                 }
179                                 if (linklen > 0) {
180                                         content_length += (32 + linklen);
181                                         converted_msg = realloc(converted_msg, content_length);
182                                         sprintf(&converted_msg[output_length], new_window);
183                                         output_length += strlen(new_window);
184                                         converted_msg[output_length] = '\"';
185                                         converted_msg[++output_length] = 0;
186                                         for (i=0; i<linklen; ++i) {
187                                                 converted_msg[output_length] = ptr[i];
188                                                 converted_msg[++output_length] = 0;
189                                         }
190                                         sprintf(&converted_msg[output_length], "\">");
191                                         output_length += 2;
192                                         for (i=0; i<linklen; ++i) {
193                                                 converted_msg[output_length] = *ptr++;
194                                                 converted_msg[++output_length] = 0;
195                                         }
196                                         sprintf(&converted_msg[output_length], "</A>");
197                                         output_length += 4;
198                                 }
199                 }
200                 else {
201                         /*
202                          * We need to know when we're inside a tag,
203                          * so we don't turn things that look like URL's into
204                          * links, when they're already links - or image sources.
205                          */
206                         if (*ptr == '<') ++brak;
207                         if (*ptr == '>') --brak;
208                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
209                         converted_msg[output_length] = *ptr++;
210                         converted_msg[++output_length] = 0;
211                 }
212         }
213
214         /* Output our big pile of markup */
215         client_write(converted_msg, output_length);
216
217         /* A little trailing vertical whitespace... */
218         wprintf("<br /><br />\n");
219
220         /* Now give back the memory */
221         free(converted_msg);
222         free(msg);
223 }
224