* Replaced serv_gets() with serv_getln() - which now requires the caller
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  */
8
9 #include <ctype.h>
10 #include <stdlib.h>
11 #include <unistd.h>
12 #include <stdio.h>
13 #include <fcntl.h>
14 #include <signal.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <sys/socket.h>
18 #include <limits.h>
19 #include <netinet/in.h>
20 #include <netdb.h>
21 #include <string.h>
22 #include <pwd.h>
23 #include <errno.h>
24 #include <stdarg.h>
25 #include <pthread.h>
26 #include <signal.h>
27 #include "webcit.h"
28 #include "vcard.h"
29 #include "webserver.h"
30
31
32 /*
33  */
34 void output_html(void) {
35         char buf[SIZ];
36         char *msg;
37         char *ptr;
38         char *msgstart;
39         char *msgend;
40         char *converted_msg;
41         int buffer_length = 1;
42         int line_length = 0;
43         int content_length = 0;
44         int output_length = 0;
45         char new_window[SIZ];
46         int brak = 0;
47         int alevel = 0;
48         int i;
49         int linklen;
50
51         msg = strdup("");
52         sprintf(new_window, "<A TARGET=\"%s\" HREF=", TARGET);
53
54         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
55                 line_length = strlen(buf);
56                 buffer_length = content_length + line_length + 2;
57                 msg = realloc(msg, buffer_length);
58                 if (msg == NULL) {
59                         wprintf("<B>realloc() error!  "
60                                 "couldn't get %d bytes: %s</B><br /><br />\n",
61                                 buffer_length + 1,
62                                 strerror(errno));
63                         return;
64                 }
65                 strcpy(&msg[content_length], buf);
66                 content_length += line_length;
67                 strcpy(&msg[content_length], "\n");
68                 content_length += 1;
69         }
70
71         ptr = msg;
72         msgstart = msg;
73         msgend = &msg[content_length];
74
75         while (ptr < msgend) {
76
77                 /* Advance to next tag */
78                 ptr = strchr(ptr, '<');
79                 if ((ptr == NULL) || (ptr >= msgend)) break;
80                 ++ptr;
81                 if ((ptr == NULL) || (ptr >= msgend)) break;
82
83                 /* Any of these tags cause everything up to and including
84                  * the tag to be removed.
85                  */     
86                 if ( (!strncasecmp(ptr, "HTML", 4))
87                    ||(!strncasecmp(ptr, "HEAD", 4))
88                    ||(!strncasecmp(ptr, "/HEAD", 5))
89                    ||(!strncasecmp(ptr, "BODY", 4)) ) {
90                         ptr = strchr(ptr, '>');
91                         if ((ptr == NULL) || (ptr >= msgend)) break;
92                         ++ptr;
93                         if ((ptr == NULL) || (ptr >= msgend)) break;
94                         msgstart = ptr;
95                 }
96
97                 /* Any of these tags cause everything including and following
98                  * the tag to be removed.
99                  */
100                 if ( (!strncasecmp(ptr, "/HTML", 5))
101                    ||(!strncasecmp(ptr, "/BODY", 5)) ) {
102                         --ptr;
103                         msgend = ptr;
104                         strcpy(ptr, "");
105                         
106                 }
107
108                 ++ptr;
109         }
110
111         converted_msg = malloc(content_length);
112         strcpy(converted_msg, "");
113         ptr = msgstart;
114         while (ptr < msgend) {
115                 /* Change mailto: links to WebCit mail, by replacing the
116                  * link with one that points back to our mail room.  Due to
117                  * the way we parse URL's, it'll even handle mailto: links
118                  * that have "?subject=" in them.
119                  */
120                 if (!strncasecmp(ptr, "<A HREF=\"mailto:", 16)) {
121                         content_length += 64;
122                         converted_msg = realloc(converted_msg, content_length);
123                         sprintf(&converted_msg[output_length],
124                                 "<A HREF=\"/display_enter"
125                                 "?force_room=_MAIL_&recp=");
126                         output_length += 47;
127                         ptr = &ptr[16];
128                         ++alevel;
129                 }
130                 /* Make links open in a separate window */
131                 else if (!strncasecmp(ptr, "<A HREF=", 8)) {
132                         content_length += 64;
133                         converted_msg = realloc(converted_msg, content_length);
134                         sprintf(&converted_msg[output_length], new_window);
135                         output_length += strlen(new_window);
136                         ptr = &ptr[8];
137                         ++alevel;
138                 }
139                 /* Turn anything that looks like a URL into a real link, as long
140                  * as it's not inside a tag already
141                  */
142                 else if ( (brak == 0) && (alevel == 0)
143                      && (!strncasecmp(ptr, "http://", 7))) {
144                                 linklen = 0;
145                                 /* Find the end of the link */
146                                 for (i=0; i<=strlen(ptr); ++i) {
147                                         if ((ptr[i]==0)
148                                            ||(isspace(ptr[i]))
149                                            ||(ptr[i]==10)
150                                            ||(ptr[i]==13)
151                                            ||(ptr[i]=='(')
152                                            ||(ptr[i]==')')
153                                            ||(ptr[i]=='<')
154                                            ||(ptr[i]=='>')
155                                            ||(ptr[i]=='[')
156                                            ||(ptr[i]==']')
157                                         ) linklen = i;
158                                         if (linklen > 0) break;
159                                 }
160                                 if (linklen > 0) {
161                                         content_length += (32 + linklen);
162                                         converted_msg = realloc(converted_msg, content_length);
163                                         sprintf(&converted_msg[output_length], new_window);
164                                         output_length += strlen(new_window);
165                                         converted_msg[output_length] = '\"';
166                                         converted_msg[++output_length] = 0;
167                                         for (i=0; i<linklen; ++i) {
168                                                 converted_msg[output_length] = ptr[i];
169                                                 converted_msg[++output_length] = 0;
170                                         }
171                                         sprintf(&converted_msg[output_length], "\">");
172                                         output_length += 2;
173                                         for (i=0; i<linklen; ++i) {
174                                                 converted_msg[output_length] = *ptr++;
175                                                 converted_msg[++output_length] = 0;
176                                         }
177                                         sprintf(&converted_msg[output_length], "</A>");
178                                         output_length += 4;
179                                 }
180                 }
181                 else {
182                         /*
183                          * We need to know when we're inside a tag,
184                          * so we don't turn things that look like URL's into
185                          * links, when they're already links - or image sources.
186                          */
187                         if (*ptr == '<') ++brak;
188                         if (*ptr == '>') --brak;
189                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
190                         converted_msg[output_length] = *ptr++;
191                         converted_msg[++output_length] = 0;
192                 }
193         }
194
195         /* Output our big pile of markup */
196         client_write(converted_msg, output_length);
197
198         /* A little trailing vertical whitespace... */
199         wprintf("<br /><br />\n");
200
201         /* Now give back the memory */
202         free(converted_msg);
203         free(msg);
204 }
205