* Cleaned up a couple of FIXME's sitting around in the code
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  */
8
9 #include <ctype.h>
10 #include <stdlib.h>
11 #include <unistd.h>
12 #include <stdio.h>
13 #include <fcntl.h>
14 #include <signal.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <sys/socket.h>
18 #include <limits.h>
19 #include <netinet/in.h>
20 #include <netdb.h>
21 #include <string.h>
22 #include <pwd.h>
23 #include <errno.h>
24 #include <stdarg.h>
25 #include <pthread.h>
26 #include <signal.h>
27 #include "webcit.h"
28 #include "vcard.h"
29 #include "webserver.h"
30
31
32 /*
33  */
34 void output_html(void) {
35         char buf[SIZ];
36         char *msg;
37         char *ptr;
38         char *msgstart;
39         char *msgend;
40         char *converted_msg;
41         int buffer_length = 1;
42         int line_length = 0;
43         int content_length = 0;
44         int output_length = 0;
45         char new_window[SIZ];
46         int brak = 0;
47         int i;
48         int linklen;
49
50         msg = strdup("");
51         sprintf(new_window, "<A TARGET=\"%s\" HREF=", TARGET);
52
53         while (serv_gets(buf), strcmp(buf, "000")) {
54                 line_length = strlen(buf);
55                 buffer_length = content_length + line_length + 2;
56                 msg = realloc(msg, buffer_length);
57                 if (msg == NULL) {
58                         wprintf("<B>realloc() error!  "
59                                 "couldn't get %d bytes: %s</B><BR><BR>\n",
60                                 buffer_length + 1,
61                                 strerror(errno));
62                         return;
63                 }
64                 strcpy(&msg[content_length], buf);
65                 content_length += line_length;
66                 strcpy(&msg[content_length], "\n");
67                 content_length += 1;
68         }
69
70         ptr = msg;
71         msgstart = msg;
72         msgend = &msg[content_length];
73
74         while (ptr < msgend) {
75
76                 /* Advance to next tag */
77                 ptr = strchr(ptr, '<');
78                 if ((ptr == NULL) || (ptr >= msgend)) break;
79                 ++ptr;
80                 if ((ptr == NULL) || (ptr >= msgend)) break;
81
82                 /* Any of these tags cause everything up to and including
83                  * the tag to be removed.
84                  */     
85                 if ( (!strncasecmp(ptr, "HTML", 4))
86                    ||(!strncasecmp(ptr, "HEAD", 4))
87                    ||(!strncasecmp(ptr, "/HEAD", 5))
88                    ||(!strncasecmp(ptr, "BODY", 4)) ) {
89                         ptr = strchr(ptr, '>');
90                         if ((ptr == NULL) || (ptr >= msgend)) break;
91                         ++ptr;
92                         if ((ptr == NULL) || (ptr >= msgend)) break;
93                         msgstart = ptr;
94                 }
95
96                 /* Any of these tags cause everything including and following
97                  * the tag to be removed.
98                  */
99                 if ( (!strncasecmp(ptr, "/HTML", 5))
100                    ||(!strncasecmp(ptr, "/BODY", 5)) ) {
101                         --ptr;
102                         msgend = ptr;
103                         strcpy(ptr, "");
104                         
105                 }
106
107                 ++ptr;
108         }
109
110         converted_msg = malloc(content_length);
111         strcpy(converted_msg, "");
112         ptr = msgstart;
113         while (ptr < msgend) {
114                 /* Change mailto: links to WebCit mail, by replacing the
115                  * link with one that points back to our mail room.  Due to
116                  * the way we parse URL's, it'll even handle mailto: links
117                  * that have "?subject=" in them.
118                  */
119                 if (!strncasecmp(ptr, "<A HREF=\"mailto:", 16)) {
120                         content_length += 64;
121                         converted_msg = realloc(converted_msg, content_length);
122                         sprintf(&converted_msg[output_length],
123                                 "<A HREF=\"/display_enter"
124                                 "?force_room=_MAIL_&recp=");
125                         output_length += 47;
126                         ptr = &ptr[16];
127                         ++brak;
128                 }
129                 /* Make links open in a separate window */
130                 else if (!strncasecmp(ptr, "<A HREF=", 8)) {
131                         content_length += 64;
132                         converted_msg = realloc(converted_msg, content_length);
133                         sprintf(&converted_msg[output_length], new_window);
134                         output_length += strlen(new_window);
135                         ptr = &ptr[8];
136                         ++brak;
137                 }
138                 /* Turn anything that looks like a URL into a real link, as long
139                  * as it's not inside a tag already
140                  */
141                 else if ( (brak == 0)
142                      && (!strncasecmp(ptr, "http://", 7))) {
143                                 linklen = 0;
144                                 /* Find the end of the link */
145                                 for (i=0; i<=strlen(ptr); ++i) {
146                                         if ((ptr[i]==0)
147                                            ||(isspace(ptr[i]))
148                                            ||(ptr[i]==10)
149                                            ||(ptr[i]==13)
150                                            ||(ptr[i]==')')
151                                            ||(ptr[i]=='>')
152                                            ||(ptr[i]==']')
153                                         ) linklen = i;
154                                         if (linklen > 0) break;
155                                 }
156                                 if (linklen > 0) {
157                                         content_length += (32 + linklen);
158                                         converted_msg = realloc(converted_msg, content_length);
159                                         sprintf(&converted_msg[output_length], new_window);
160                                         output_length += strlen(new_window);
161                                         converted_msg[output_length] = '\"';
162                                         converted_msg[++output_length] = 0;
163                                         for (i=0; i<linklen; ++i) {
164                                                 converted_msg[output_length] = ptr[i];
165                                                 converted_msg[++output_length] = 0;
166                                         }
167                                         sprintf(&converted_msg[output_length], "\">");
168                                         output_length += 2;
169                                         for (i=0; i<linklen; ++i) {
170                                                 converted_msg[output_length] = *ptr++;
171                                                 converted_msg[++output_length] = 0;
172                                         }
173                                         sprintf(&converted_msg[output_length], "</A>");
174                                         output_length += 4;
175                                 }
176                 }
177                 else {
178                         /*
179                          * We need to know when we're inside a tag,
180                          * so we don't turn things that look like URL's into
181                          * links, when they're already links - or image sources.
182                          */
183                         if (*ptr == '<') ++brak;
184                         if (*ptr == '>') --brak;
185                         converted_msg[output_length] = *ptr++;
186                         converted_msg[++output_length] = 0;
187                 }
188         }
189
190         /* Output our big pile of markup */
191         client_write(converted_msg, output_length);
192
193         /* A little trailing vertical whitespace... */
194         wprintf("<BR><BR>\n");
195
196         /* Now give back the memory */
197         free(converted_msg);
198         free(msg);
199 }
200