removed a bunch of blank comment lines
[citadel.git] / webcit / html2html.c
1 /*
2  * Output an HTML message, modifying it slightly to make sure it plays nice
3  * with the rest of our web framework.
4  *
5  * Copyright (c) 2005-2012 by the citadel.org team
6  *
7  * This program is open source software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License, version 3.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  */
15
16 #include "webcit.h"
17 #include "webserver.h"
18
19
20 /*
21  * Strip surrounding single or double quotes from a string.
22  */
23 void stripquotes(char *s)
24 {
25         int len;
26
27         if (!s) return;
28
29         len = strlen(s);
30         if (len < 2) return;
31
32         if ( ( (s[0] == '\"') && (s[len-1] == '\"') ) || ( (s[0] == '\'') && (s[len-1] == '\'') ) ) {
33                 s[len-1] = 0;
34                 strcpy(s, &s[1]);
35         }
36 }
37
38
39 /*
40  * Check to see if a META tag has overridden the declared MIME character set.
41  *
42  * charset              Character set name (left unchanged if we don't do anything)
43  * meta_http_equiv      Content of the "http-equiv" portion of the META tag
44  * meta_content         Content of the "content" portion of the META tag
45  */
46 void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_content)
47 {
48         char *ptr;
49         char buf[64];
50
51         if (!charset) return;
52         if (!meta_http_equiv) return;
53         if (!meta_content) return;
54
55
56         if (strcasecmp(meta_http_equiv, "Content-type")) return;
57
58         ptr = strchr(meta_content, ';');
59         if (!ptr) return;
60
61         safestrncpy(buf, ++ptr, sizeof buf);
62         striplt(buf);
63         if (!strncasecmp(buf, "charset=", 8)) {
64                 strcpy(charset, &buf[8]);
65
66                 /*
67                  * The brain-damaged webmail program in Microsoft Exchange declares
68                  * a charset of "unicode" when they really mean "UTF-8".  GNU iconv
69                  * treats "unicode" as an alias for "UTF-16" so we have to manually
70                  * fix this here, otherwise messages generated in Exchange webmail
71                  * show up as a big pile of weird characters.
72                  */
73                 if (!strcasecmp(charset, "unicode")) {
74                         strcpy(charset, "UTF-8");
75                 }
76
77                 /* Remove wandering punctuation */
78                 if ((ptr=strchr(charset, '\"'))) *ptr = 0;
79                 striplt(charset);
80         }
81 }
82
83
84
85 /*
86  * Sanitize and enhance an HTML message for display.
87  * Also convert weird character sets to UTF-8 if necessary.
88  * Also fixup img src="cid:..." type inline images to fetch the image
89  *
90  */
91 void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, StrBuf *Source, StrBuf *Target) {
92         char buf[SIZ];
93         char *msg;
94         char *ptr;
95         char *msgstart;
96         char *msgend;
97         StrBuf *converted_msg;
98         int buffer_length = 1;
99         int line_length = 0;
100         int content_length = 0;
101         char new_window[SIZ];
102         int brak = 0;
103         int alevel = 0;
104         int scriptlevel = 0;
105         int script_start_pos = (-1);
106         int i;
107         int linklen;
108         char charset[128];
109         StrBuf *BodyArea = NULL;
110 #ifdef HAVE_ICONV
111         iconv_t ic = (iconv_t)(-1) ;
112         char *ibuf;                   /* Buffer of characters to be converted */
113         char *obuf;                   /* Buffer for converted characters      */
114         size_t ibuflen;               /* Length of input buffer               */
115         size_t obuflen;               /* Length of output buffer              */
116         char *osav;                   /* Saved pointer to output buffer       */
117 #endif
118         if (Target == NULL)
119                 Target = WC->WBuf;
120
121         safestrncpy(charset, supplied_charset, sizeof charset);
122         msg = strdup("");
123         sprintf(new_window, "<a target=\"%s\" href=", TARGET);
124
125         if (Source == NULL) while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
126                 line_length = strlen(buf);
127                 buffer_length = content_length + line_length + 2;
128                 ptr = realloc(msg, buffer_length);
129                 if (ptr == NULL) {
130                         StrBufAppendPrintf(Target, "<b>");
131                         StrBufAppendPrintf(Target, _("realloc() error! couldn't get %d bytes: %s"),
132                                         buffer_length + 1,
133                                         strerror(errno));
134                         StrBufAppendPrintf(Target, "</b><br><br>\n");
135                         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
136                                 /** flush */
137                         }
138                         free(msg);
139                         return;
140                 }
141                 msg = ptr;
142                 strcpy(&msg[content_length], buf);
143                 content_length += line_length;
144                 strcpy(&msg[content_length], "\n");
145                 content_length += 1;
146         }
147         else {
148                 content_length = StrLength(Source);
149                 free(msg);
150                 msg = (char*) ChrPtr(Source);/* TODO: remove cast */
151                 buffer_length = content_length;
152         }
153
154         /** Do a first pass to isolate the message body */
155         ptr = msg + 1;
156         msgstart = msg;
157         msgend = &msg[content_length];
158
159         while (ptr < msgend) {
160
161                 /** Advance to next tag */
162                 ptr = strchr(ptr, '<');
163                 if ((ptr == NULL) || (ptr >= msgend)) break;
164                 ++ptr;
165                 if ((ptr == NULL) || (ptr >= msgend)) break;
166
167                 /*
168                  *  Look for META tags.  Some messages (particularly in
169                  *  Asian locales) illegally declare a message's character
170                  *  set in the HTML instead of in the MIME headers.  This
171                  *  is wrong but we have to work around it anyway.
172                  */
173                 if (!strncasecmp(ptr, "META", 4)) {
174
175                         char *meta_start;
176                         char *meta_end;
177                         int meta_length;
178                         char *meta;
179                         char *meta_http_equiv;
180                         char *meta_content;
181                         char *spaceptr;
182
183                         meta_start = &ptr[4];
184                         meta_end = strchr(ptr, '>');
185                         if ((meta_end != NULL) && (meta_end <= msgend)) {
186                                 meta_length = meta_end - meta_start + 1;
187                                 meta = malloc(meta_length + 1);
188                                 safestrncpy(meta, meta_start, meta_length);
189                                 meta[meta_length] = 0;
190                                 striplt(meta);
191                                 if (!strncasecmp(meta, "HTTP-EQUIV=", 11)) {
192                                         meta_http_equiv = strdup(&meta[11]);
193                                         spaceptr = strchr(meta_http_equiv, ' ');
194                                         if (spaceptr != NULL) {
195                                                 *spaceptr = 0;
196                                                 meta_content = strdup(++spaceptr);
197                                                 if (!strncasecmp(meta_content, "content=", 8)) {
198                                                         strcpy(meta_content, &meta_content[8]);
199                                                         stripquotes(meta_http_equiv);
200                                                         stripquotes(meta_content);
201                                                         extract_charset_from_meta(charset,
202                                                                         meta_http_equiv, meta_content);
203                                                 }
204                                                 free(meta_content);
205                                         }
206                                         free(meta_http_equiv);
207                                 }
208                                 free(meta);
209                         }
210                 }
211
212                 /*
213                  * Any of these tags cause everything up to and including
214                  * the tag to be removed.
215                  */     
216                 if ( (!strncasecmp(ptr, "HTML", 4))
217                                 ||(!strncasecmp(ptr, "HEAD", 4))
218                                 ||(!strncasecmp(ptr, "/HEAD", 5))
219                                 ||(!strncasecmp(ptr, "BODY", 4)) ) {
220                         char *pBody = NULL;
221
222                         if (!strncasecmp(ptr, "BODY", 4)) {
223                                 pBody = ptr;
224                         }
225                         ptr = strchr(ptr, '>');
226                         if ((ptr == NULL) || (ptr >= msgend)) break;
227                         if ((pBody != NULL) && (ptr - pBody > 4)) {
228                                 char* src;
229                                 char *cid_start, *cid_end;
230
231                                 *ptr = '\0';
232                                 pBody += 4; 
233                                 while ((isspace(*pBody)) && (pBody < ptr))
234                                         pBody ++;
235                                 BodyArea = NewStrBufPlain(NULL,  ptr - pBody);
236
237                                 if (pBody < ptr) {
238                                         src = strstr(pBody, "cid:");
239                                         if (src) {
240                                                 cid_start = src + 4;
241                                                 cid_end = cid_start;
242                                                 while ((*cid_end != '"') && 
243                                                                 !isspace(*cid_end) &&
244                                                                 (cid_end < ptr))
245                                                         cid_end ++;
246
247                                                 /* copy tag and attributes up to src="cid: */
248                                                 StrBufAppendBufPlain(BodyArea, pBody, src - pBody, 0);
249
250                                                 /* add in /webcit/mimepart/<msgno>/CID/ 
251                                                    trailing / stops dumb URL filters getting excited */
252                                                 StrBufAppendPrintf(BodyArea,
253                                                                 "/webcit/mimepart/%d/",msgnum);
254                                                 StrBufAppendBufPlain(BodyArea, cid_start, cid_end - cid_start, 0);
255
256                                                 if (ptr - cid_end > 0)
257                                                         StrBufAppendBufPlain(BodyArea, 
258                                                                         cid_end + 1, 
259                                                                         ptr - cid_end, 0);
260                                         }
261                                         else 
262                                                 StrBufAppendBufPlain(BodyArea, pBody, ptr - pBody, 0);
263                                 }
264                                 *ptr = '>';
265                         }
266                         ++ptr;
267                         if ((ptr == NULL) || (ptr >= msgend)) break;
268                         msgstart = ptr;
269                 }
270
271                 /*
272                  * Any of these tags cause everything including and following
273                  * the tag to be removed.
274                  */
275                 if ( (!strncasecmp(ptr, "/HTML", 5))
276                                 ||(!strncasecmp(ptr, "/BODY", 5)) ) {
277                         --ptr;
278                         msgend = ptr;
279                         strcpy(ptr, "");
280
281                 }
282
283                 ++ptr;
284         }
285         if (msgstart > msg) {
286                 strcpy(msg, msgstart);
287         }
288
289         /* Now go through the message, parsing tags as necessary. */
290         converted_msg = NewStrBufPlain(NULL, content_length + 8192);
291
292
293         /** Convert foreign character sets to UTF-8 if necessary. */
294 #ifdef HAVE_ICONV
295         if ( (strcasecmp(charset, "us-ascii"))
296                         && (strcasecmp(charset, "UTF-8"))
297                         && (strcasecmp(charset, ""))
298            ) {
299                 syslog(9, "Converting %s to UTF-8\n", charset);
300                 ctdl_iconv_open("UTF-8", charset, &ic);
301                 if (ic == (iconv_t)(-1) ) {
302                         syslog(5, "%s:%d iconv_open() failed: %s\n",
303                                         __FILE__, __LINE__, strerror(errno));
304                 }
305         }
306         if  (Source == NULL) {
307                 if (ic != (iconv_t)(-1) ) {
308                         ibuf = msg;
309                         ibuflen = content_length;
310                         obuflen = content_length + (content_length / 2) ;
311                         obuf = (char *) malloc(obuflen);
312                         osav = obuf;
313                         iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
314                         content_length = content_length + (content_length / 2) - obuflen;
315                         osav[content_length] = 0;
316                         free(msg);
317                         msg = osav;
318                         iconv_close(ic);
319                 }
320         }
321         else {
322                 if (ic != (iconv_t)(-1) ) {
323                         StrBuf *Buf = NewStrBufPlain(NULL, StrLength(Source) + 8096);;
324                         StrBufConvert(Source, Buf, &ic);
325                         FreeStrBuf(&Buf);
326                         iconv_close(ic);
327                         msg = (char*)ChrPtr(Source); /* TODO: get rid of this. */
328                 }
329         }
330
331 #endif
332
333         /*
334          *      At this point, the message has been stripped down to
335          *      only the content inside the <BODY></BODY> tags, and has
336          *      been converted to UTF-8 if it was originally in a foreign
337          *      character set.  The text is also guaranteed to be null
338          *      terminated now.
339          */
340
341         if (converted_msg == NULL) {
342                 StrBufAppendPrintf(Target, "Error %d: %s<br>%s:%d", errno, strerror(errno), __FILE__, __LINE__);
343                 goto BAIL;
344         }
345
346         if (BodyArea != NULL) {
347                 StrBufAppendBufPlain(converted_msg, HKEY("<table "), 0);  
348                 StrBufAppendBuf(converted_msg, BodyArea, 0);
349                 StrBufAppendBufPlain(converted_msg, HKEY(" width=\"100%\"><tr><td>"), 0);
350         }
351         ptr = msg;
352         msgend = strchr(msg, 0);
353         while (ptr < msgend) {
354
355                 /** Try to sanitize the html of any rogue scripts */
356                 if (!strncasecmp(ptr, "<script", 7)) {
357                         if (scriptlevel == 0) {
358                                 script_start_pos = StrLength(converted_msg);
359                         }
360                         ++scriptlevel;
361                 }
362                 if (!strncasecmp(ptr, "</script", 8)) {
363                         --scriptlevel;
364                 }
365
366                 /**
367                  * Change mailto: links to WebCit mail, by replacing the
368                  * link with one that points back to our mail room.  Due to
369                  * the way we parse URL's, it'll even handle mailto: links
370                  * that have "?subject=" in them.
371                  */
372                 if (!strncasecmp(ptr, "<a href=\"mailto:", 16)) {
373                         content_length += 64;
374                         StrBufAppendPrintf(converted_msg,
375                                         "<a href=\"display_enter?force_room=_MAIL_?recp=");
376                         ptr = &ptr[16];
377                         ++alevel;
378                         ++brak;
379                 }
380                 /** Make external links open in a separate window */
381                 else if (!strncasecmp(ptr, "<a href=\"", 9)) {
382                         ++alevel;
383                         ++brak;
384                         if ( ((strchr(ptr, ':') < strchr(ptr, '/')))
385                                         &&  ((strchr(ptr, '/') < strchr(ptr, '>'))) 
386                            ) {
387                                 /* open external links to new window */
388                                 StrBufAppendPrintf(converted_msg, new_window);
389                                 ptr = &ptr[8];
390                         }
391                         else if (
392                                 (treat_as_wiki)
393                                 && (strncasecmp(ptr, "<a href=\"wiki?", 14))
394                                 && (strncasecmp(ptr, "<a href=\"dotgoto?", 17))
395                                 && (strncasecmp(ptr, "<a href=\"knrooms?", 17))
396                         ) {
397                                 content_length += 64;
398                                 StrBufAppendPrintf(converted_msg, "<a href=\"wiki?go=");
399                                 StrBufUrlescAppend(converted_msg, WC->CurRoom.name, NULL);
400                                 StrBufAppendPrintf(converted_msg, "?page=");
401                                 ptr = &ptr[9];
402                         }
403                         else {
404                                 StrBufAppendPrintf(converted_msg, "<a href=\"");
405                                 ptr = &ptr[9];
406                         }
407                 }
408                 /** Fixup <img src="cid:... ...> to fetch the mime part */
409                 else if (!strncasecmp(ptr, "<img ", 5)) {
410                         char *cid_start, *cid_end;
411                         char* tag_end=strchr(ptr,'>');
412                         char* src;
413                         /* FIXME - handle this situation (maybe someone opened an <img cid... 
414                          * and then ended the message)
415                          */
416                         if (!tag_end) {
417                                 syslog(9, "tag_end is null and ptr is:\n");
418                                 syslog(9, "%s\n", ptr);
419                                 syslog(9, "Theoretical bytes remaining: %d\n", (int)(msgend - ptr));
420                         }
421
422                         src=strstr(ptr, "src=\"cid:");
423                         ++brak;
424
425                         if (src
426                             && isspace(*(src-1))
427                                 && tag_end
428                                 && (cid_start=strchr(src,':'))
429                                 && (cid_end=strchr(cid_start,'"'))
430                                 && (cid_end < tag_end)
431                         ) {
432                                 /* copy tag and attributes up to src="cid: */
433                                 StrBufAppendBufPlain(converted_msg, ptr, src - ptr, 0);
434                                 cid_start++;
435
436                                 /* add in /webcit/mimepart/<msgno>/CID/ 
437                                    trailing / stops dumb URL filters getting excited */
438                                 StrBufAppendPrintf(converted_msg,
439                                                 " src=\"/webcit/mimepart/%d/",msgnum);
440                                 StrBufAppendBufPlain(converted_msg, cid_start, cid_end - cid_start, 0);
441                                 StrBufAppendBufPlain(converted_msg, "/\"", -1, 0);
442
443                                 ptr = cid_end+1;
444                         }
445                         StrBufAppendBufPlain(converted_msg, ptr, tag_end - ptr, 0);
446                         ptr = tag_end;
447                 }
448
449                 /**
450                  * Turn anything that looks like a URL into a real link, as long
451                  * as it's not inside a tag already
452                  */
453                 else if ( (brak == 0) && (alevel == 0)
454                      && (!strncasecmp(ptr, "http://", 7))) {
455                                 /** Find the end of the link */
456                                 int strlenptr;
457                                 linklen = 0;
458                                 
459                                 strlenptr = strlen(ptr);
460                                 for (i=0; i<=strlenptr; ++i) {
461                                         if ((ptr[i]==0)
462                                            ||(isspace(ptr[i]))
463                                            ||(ptr[i]==10)
464                                            ||(ptr[i]==13)
465                                            ||(ptr[i]=='(')
466                                            ||(ptr[i]==')')
467                                            ||(ptr[i]=='<')
468                                            ||(ptr[i]=='>')
469                                            ||(ptr[i]=='[')
470                                            ||(ptr[i]==']')
471                                            ||(ptr[i]=='"')
472                                            ||(ptr[i]=='\'')
473                                         ) linklen = i;
474                                         /* did s.b. send us an entity? */
475                                         if (ptr[i] == '&') {
476                                                 if ((ptr[i+2] ==';') ||
477                                                     (ptr[i+3] ==';') ||
478                                                     (ptr[i+5] ==';') ||
479                                                     (ptr[i+6] ==';') ||
480                                                     (ptr[i+7] ==';'))
481                                                         linklen = i;
482                                         }
483                                         if (linklen > 0) break;
484                                 }
485                                 if (linklen > 0) {
486                                         char *ltreviewptr;
487                                         char *nbspreviewptr;
488                                         char linkedchar;
489                                         int len;
490                                         
491                                         len = linklen;
492                                         linkedchar = ptr[len];
493                                         ptr[len] = '\0';
494                                         /* spot for some subject strings tinymce tends to give us. */
495                                         ltreviewptr = strchr(ptr, '<');
496                                         if (ltreviewptr != NULL) {
497                                                 *ltreviewptr = '\0';
498                                                 linklen = ltreviewptr - ptr;
499                                         }
500
501                                         nbspreviewptr = strstr(ptr, "&nbsp;");
502                                         if (nbspreviewptr != NULL) {
503                                                 /* nbspreviewptr = '\0'; */
504                                                 linklen = nbspreviewptr - ptr;
505                                         }
506                                         if (ltreviewptr != 0)
507                                                 *ltreviewptr = '<';
508
509                                         ptr[len] = linkedchar;
510
511                                         content_length += (32 + linklen);
512                                         StrBufAppendPrintf(converted_msg, "%s\"", new_window);
513                                         StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
514                                         StrBufAppendPrintf(converted_msg, "\">");
515                                         StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
516                                         ptr += linklen;
517                                         StrBufAppendPrintf(converted_msg, "</A>");
518                                 }
519                 }
520                 else {
521                         StrBufAppendBufPlain(converted_msg, ptr, 1, 0);
522                         ptr++;
523                 }
524
525
526                 if ((ptr >= msg) && (ptr <= msgend)) {
527                         /*
528                          * We need to know when we're inside a tag,
529                          * so we don't turn things that look like URL's into
530                          * links, when they're already links - or image sources.
531                          */
532                         if ((ptr > msg) && (*(ptr-1) == '<')) {
533                                 ++brak;
534                         }
535                         if ((ptr > msg) && (*(ptr-1) == '>')) {
536                                 --brak;
537                                 if ((scriptlevel == 0) && (script_start_pos >= 0)) {
538                                         StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos);
539                                         script_start_pos = (-1);
540                                 }
541                         }
542                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
543                 }
544         }
545
546         if (BodyArea != NULL) {
547                 StrBufAppendBufPlain(converted_msg, HKEY("</td></tr></table>"), 0);  
548                 FreeStrBuf(&BodyArea);
549         }
550
551         /**     uncomment these two lines to override conversion        */
552         /**     memcpy(converted_msg, msg, content_length);             */
553         /**     output_length = content_length;                         */
554
555         /** Output our big pile of markup */
556         StrBufAppendBuf(Target, converted_msg, 0);
557
558 BAIL:   /** A little trailing vertical whitespace... */
559         StrBufAppendPrintf(Target, "<br><br>\n");
560
561         /** Now give back the memory */
562         FreeStrBuf(&converted_msg);
563         if ((msg != NULL) && (Source == NULL)) free(msg);
564 }
565
566
567
568
569
570
571 /*
572  * Look for URL's embedded in a buffer and make them linkable.  We use a
573  * target window in order to keep the Citadel session in its own window.
574  */
575 void UrlizeText(StrBuf* Target, StrBuf *Source, StrBuf *WrkBuf)
576 {
577         int len, UrlLen, Offset, TrailerLen;
578         const char *start, *end, *pos;
579         
580         FlushStrBuf(Target);
581
582         start = NULL;
583         len = StrLength(Source);
584         end = ChrPtr(Source) + len;
585         for (pos = ChrPtr(Source); (pos < end) && (start == NULL); ++pos) {
586                 if (!strncasecmp(pos, "http://", 7))
587                         start = pos;
588                 else if (!strncasecmp(pos, "ftp://", 6))
589                         start = pos;
590         }
591
592         if (start == NULL) {
593                 StrBufAppendBuf(Target, Source, 0);
594                 return;
595         }
596         FlushStrBuf(WrkBuf);
597
598         for (pos = ChrPtr(Source) + len; pos > start; --pos) {
599                 if (  (!isprint(*pos))
600                    || (isspace(*pos))
601                    || (*pos == '{')
602                    || (*pos == '}')
603                    || (*pos == '|')
604                    || (*pos == '\\')
605                    || (*pos == '^')
606                    || (*pos == '[')
607                    || (*pos == ']')
608                    || (*pos == '`')
609                    || (*pos == '<')
610                    || (*pos == '>')
611                    || (*pos == '(')
612                    || (*pos == ')')
613                 ) {
614                         end = pos;
615                 }
616         }
617         
618         UrlLen = end - start;
619         StrBufAppendBufPlain(WrkBuf, start, UrlLen, 0);
620
621         Offset = start - ChrPtr(Source);
622         if (Offset != 0)
623                 StrBufAppendBufPlain(Target, ChrPtr(Source), Offset, 0);
624         StrBufAppendPrintf(Target, "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
625                            LB, QU, ChrPtr(WrkBuf), QU, QU, TARGET, 
626                            QU, RB, ChrPtr(WrkBuf), LB, RB);
627
628         TrailerLen = StrLength(Source) - (end - ChrPtr(Source));
629         if (TrailerLen > 0)
630                 StrBufAppendBufPlain(Target, end, TrailerLen, 0);
631 }
632
633
634 void url(char *buf, size_t bufsize)
635 {
636         int len, UrlLen, Offset, TrailerLen, outpos;
637         char *start, *end, *pos;
638         char urlbuf[SIZ];
639         char outbuf[SIZ];
640
641         start = NULL;
642         len = strlen(buf);
643         if (len > bufsize) {
644                 syslog(1, "URL: content longer than buffer!");
645                 return;
646         }
647         end = buf + len;
648         for (pos = buf; (pos < end) && (start == NULL); ++pos) {
649                 if (!strncasecmp(pos, "http://", 7))
650                         start = pos;
651                 if (!strncasecmp(pos, "ftp://", 6))
652                         start = pos;
653         }
654
655         if (start == NULL)
656                 return;
657
658         for (pos = buf+len; pos > start; --pos) {
659                 if (  (!isprint(*pos))
660                    || (isspace(*pos))
661                    || (*pos == '{')
662                    || (*pos == '}')
663                    || (*pos == '|')
664                    || (*pos == '\\')
665                    || (*pos == '^')
666                    || (*pos == '[')
667                    || (*pos == ']')
668                    || (*pos == '`')
669                    || (*pos == '<')
670                    || (*pos == '>')
671                    || (*pos == '(')
672                    || (*pos == ')')
673                 ) {
674                         end = pos;
675                 }
676         }
677         
678         UrlLen = end - start;
679         if (UrlLen > sizeof(urlbuf)){
680                 syslog(1, "URL: content longer than buffer!");
681                 return;
682         }
683         memcpy(urlbuf, start, UrlLen);
684         urlbuf[UrlLen] = '\0';
685
686         Offset = start - buf;
687         if ((Offset != 0) && (Offset < sizeof(outbuf)))
688                 memcpy(outbuf, buf, Offset);
689         outpos = snprintf(&outbuf[Offset], sizeof(outbuf) - Offset,  
690                           "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
691                           LB, QU, urlbuf, QU, QU, TARGET, QU, RB, urlbuf, LB, RB);
692         if (outpos >= sizeof(outbuf) - Offset) {
693                 syslog(1, "URL: content longer than buffer!");
694                 return;
695         }
696
697         TrailerLen = len - (end - start);
698         if (TrailerLen > 0)
699                 memcpy(outbuf + Offset + outpos, end, TrailerLen);
700         if (Offset + outpos + TrailerLen > bufsize) {
701                 syslog(1, "URL: content longer than buffer!");
702                 return;
703         }
704         memcpy (buf, outbuf, Offset + outpos + TrailerLen);
705         *(buf + Offset + outpos + TrailerLen) = '\0';
706 }
707