* Completed SSL support. Still doesn't work with all browsers... gotta
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  */
8
9 #include <ctype.h>
10 #include <stdlib.h>
11 #include <unistd.h>
12 #include <stdio.h>
13 #include <fcntl.h>
14 #include <signal.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <sys/socket.h>
18 #include <limits.h>
19 #include <netinet/in.h>
20 #include <netdb.h>
21 #include <string.h>
22 #include <pwd.h>
23 #include <errno.h>
24 #include <stdarg.h>
25 #include <pthread.h>
26 #include <signal.h>
27 #include "webcit.h"
28 #include "vcard.h"
29 #include "webserver.h"
30
31
32 /*
33  */
34 void output_html(void) {
35         char buf[SIZ];
36         char *msg;
37         char *ptr;
38         char *msgstart;
39         char *msgend;
40         char *converted_msg;
41         int buffer_length = 1;
42         int line_length = 0;
43         int content_length = 0;
44         int output_length = 0;
45         char new_window[SIZ];
46         int brak = 0;
47         int in_link = 0;
48         int i;
49         int linklen;
50
51         msg = strdup("");
52         sprintf(new_window, "<A TARGET=\"%s\" HREF=", TARGET);
53
54         while (serv_gets(buf), strcmp(buf, "000")) {
55                 line_length = strlen(buf);
56                 buffer_length = content_length + line_length + 2;
57                 msg = realloc(msg, buffer_length);
58                 if (msg == NULL) {
59                         wprintf("<B>realloc() error!  "
60                                 "couldn't get %d bytes: %s</B><BR><BR>\n",
61                                 buffer_length + 1,
62                                 strerror(errno));
63                         return;
64                 }
65                 strcpy(&msg[content_length], buf);
66                 content_length += line_length;
67                 strcpy(&msg[content_length], "\n");
68                 content_length += 1;
69         }
70
71         ptr = msg;
72         msgstart = msg;
73         msgend = &msg[content_length];
74
75         while (ptr < msgend) {
76
77                 /* Advance to next tag */
78                 ptr = strchr(ptr, '<');
79                 if ((ptr == NULL) || (ptr >= msgend)) break;
80                 ++ptr;
81                 if ((ptr == NULL) || (ptr >= msgend)) break;
82
83                 /* Any of these tags cause everything up to and including
84                  * the tag to be removed.
85                  */     
86                 if ( (!strncasecmp(ptr, "HTML", 4))
87                    ||(!strncasecmp(ptr, "HEAD", 4))
88                    ||(!strncasecmp(ptr, "/HEAD", 5))
89                    ||(!strncasecmp(ptr, "BODY", 4)) ) {
90                         ptr = strchr(ptr, '>');
91                         if ((ptr == NULL) || (ptr >= msgend)) break;
92                         ++ptr;
93                         if ((ptr == NULL) || (ptr >= msgend)) break;
94                         msgstart = ptr;
95                 }
96
97                 /* Any of these tags cause everything including and following
98                  * the tag to be removed.
99                  */
100                 if ( (!strncasecmp(ptr, "/HTML", 5))
101                    ||(!strncasecmp(ptr, "/BODY", 5)) ) {
102                         --ptr;
103                         msgend = ptr;
104                         strcpy(ptr, "");
105                         
106                 }
107
108                 ++ptr;
109         }
110
111         converted_msg = malloc(content_length);
112         strcpy(converted_msg, "");
113         ptr = msgstart;
114         while (ptr < msgend) {
115                 /* Change mailto: links to WebCit mail, by replacing the
116                  * link with one that points back to our mail room.  Due to
117                  * the way we parse URL's, it'll even handle mailto: links
118                  * that have "?subject=" in them.
119                  */
120                 if (!strncasecmp(ptr, "<A HREF=\"mailto:", 16)) {
121                         content_length += 64;
122                         converted_msg = realloc(converted_msg, content_length);
123                         sprintf(&converted_msg[output_length],
124                                 "<A HREF=\"/display_enter"
125                                 "?force_room=_MAIL_&recp=");
126                         output_length += 47;
127                         ptr = &ptr[16];
128                         ++brak;
129                 }
130                 /* Make links open in a separate window */
131                 else if (!strncasecmp(ptr, "<A HREF=", 8)) {
132                         content_length += 64;
133                         converted_msg = realloc(converted_msg, content_length);
134                         sprintf(&converted_msg[output_length], new_window);
135                         output_length += strlen(new_window);
136                         ptr = &ptr[8];
137                         ++brak;
138                 }
139                 /* Turn anything that looks like a URL into a real link */
140                 else if ( (in_link == 0)
141                      && (!strncasecmp(ptr, "http://", 7))) {
142                                 linklen = 0;
143                                 /* Find the end of the link */
144                                 for (i=0; i<=strlen(ptr); ++i) {
145                                         if ((ptr[i]==0)
146                                            ||(isspace(ptr[i]))
147                                            ||(ptr[i]==10)
148                                            ||(ptr[i]==13)
149                                            ||(ptr[i]==')')
150                                            ||(ptr[i]=='>')
151                                            ||(ptr[i]==']')
152                                         ) linklen = i;
153                                         if (linklen > 0) break;
154                                 }
155                                 if (linklen > 0) {
156                                         content_length += (32 + linklen);
157                                         converted_msg = realloc(converted_msg, content_length);
158                                         sprintf(&converted_msg[output_length], new_window);
159                                         output_length += strlen(new_window);
160                                         converted_msg[output_length] = '\"';
161                                         converted_msg[++output_length] = 0;
162                                         for (i=0; i<linklen; ++i) {
163                                                 converted_msg[output_length] = ptr[i];
164                                                 converted_msg[++output_length] = 0;
165                                         }
166                                         sprintf(&converted_msg[output_length], "\">");
167                                         output_length += 2;
168                                         for (i=0; i<linklen; ++i) {
169                                                 converted_msg[output_length] = *ptr++;
170                                                 converted_msg[++output_length] = 0;
171                                         }
172                                         sprintf(&converted_msg[output_length], "</A>");
173                                         output_length += 4;
174                                 }
175                 }
176                 else {
177                         /*
178                          * We need to know when we're inside a pair of <A>...</A>
179                          * tags, so we don't turn things that look like URL's into
180                          * links, when they're already links.
181                          */
182                         if (!strncasecmp(ptr, "<A ", 3)) {
183                                 ++in_link;
184                         }
185                         if (!strncasecmp(ptr, "</A", 3)) {
186                                 --in_link;
187                         }
188                         if (*ptr == '<') ++brak;
189                         if (*ptr == '>') --brak;
190                         converted_msg[output_length] = *ptr++;
191                         converted_msg[++output_length] = 0;
192                 }
193         }
194
195         /* Output our big pile of markup */
196         client_write(converted_msg, output_length);
197
198         /* A little trailing vertical whitespace... */
199         wprintf("<BR><BR>\n");
200
201         /* Now give back the memory */
202         free(converted_msg);
203         free(msg);
204 }
205