4185b1fedae980fc58db27c7feec74ddb6c1ec24
[citadel.git] / webcit / html2html.c
1 /*
2  * $Id$
3  *
4  * Output an HTML message, modifying it slightly to make sure it plays nice
5  * with the rest of our web framework.
6  *
7  * Copyright (c) 2005-2010 by the citadel.org team
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 3 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  */
22
23 #include "webcit.h"
24 #include "webserver.h"
25
26
27 /*
28  * Strip surrounding single or double quotes from a string.
29  */
30 void stripquotes(char *s)
31 {
32         int len;
33
34         if (!s) return;
35
36         len = strlen(s);
37         if (len < 2) return;
38
39         if ( ( (s[0] == '\"') && (s[len-1] == '\"') ) || ( (s[0] == '\'') && (s[len-1] == '\'') ) ) {
40                 s[len-1] = 0;
41                 strcpy(s, &s[1]);
42         }
43 }
44
45
46 /*
47  * Check to see if a META tag has overridden the declared MIME character set.
48  *
49  * charset              Character set name (left unchanged if we don't do anything)
50  * meta_http_equiv      Content of the "http-equiv" portion of the META tag
51  * meta_content         Content of the "content" portion of the META tag
52  */
53 void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_content)
54 {
55         char *ptr;
56         char buf[64];
57
58         if (!charset) return;
59         if (!meta_http_equiv) return;
60         if (!meta_content) return;
61
62
63         if (strcasecmp(meta_http_equiv, "Content-type")) return;
64
65         ptr = strchr(meta_content, ';');
66         if (!ptr) return;
67
68         safestrncpy(buf, ++ptr, sizeof buf);
69         striplt(buf);
70         if (!strncasecmp(buf, "charset=", 8)) {
71                 strcpy(charset, &buf[8]);
72
73                 /*
74                  * The brain-damaged webmail program in Microsoft Exchange declares
75                  * a charset of "unicode" when they really mean "UTF-8".  GNU iconv
76                  * treats "unicode" as an alias for "UTF-16" so we have to manually
77                  * fix this here, otherwise messages generated in Exchange webmail
78                  * show up as a big pile of weird characters.
79                  */
80                 if (!strcasecmp(charset, "unicode")) {
81                         strcpy(charset, "UTF-8");
82                 }
83
84                 /* Remove wandering punctuation */
85                 if ((ptr=strchr(charset, '\"'))) *ptr = 0;
86                 striplt(charset);
87         }
88 }
89
90
91
92 /*
93  * Sanitize and enhance an HTML message for display.
94  * Also convert weird character sets to UTF-8 if necessary.
95  * Also fixup img src="cid:..." type inline images to fetch the image
96  *
97  */
98 void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, StrBuf *Source, StrBuf *Target) {
99         char buf[SIZ];
100         char *msg;
101         char *ptr;
102         char *msgstart;
103         char *msgend;
104         StrBuf *converted_msg;
105         int buffer_length = 1;
106         int line_length = 0;
107         int content_length = 0;
108         char new_window[SIZ];
109         int brak = 0;
110         int alevel = 0;
111         int scriptlevel = 0;
112         int script_start_pos = (-1);
113         int i;
114         int linklen;
115         char charset[128];
116         StrBuf *BodyArea = NULL;
117 #ifdef HAVE_ICONV
118         iconv_t ic = (iconv_t)(-1) ;
119         char *ibuf;                   /* Buffer of characters to be converted */
120         char *obuf;                   /* Buffer for converted characters      */
121         size_t ibuflen;               /* Length of input buffer               */
122         size_t obuflen;               /* Length of output buffer              */
123         char *osav;                   /* Saved pointer to output buffer       */
124 #endif
125         if (Target == NULL)
126                 Target = WC->WBuf;
127
128         safestrncpy(charset, supplied_charset, sizeof charset);
129         msg = strdup("");
130         sprintf(new_window, "<a target=\"%s\" href=", TARGET);
131
132         if (Source == NULL) while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
133                 line_length = strlen(buf);
134                 buffer_length = content_length + line_length + 2;
135                 ptr = realloc(msg, buffer_length);
136                 if (ptr == NULL) {
137                         StrBufAppendPrintf(Target, "<b>");
138                         StrBufAppendPrintf(Target, _("realloc() error! couldn't get %d bytes: %s"),
139                                         buffer_length + 1,
140                                         strerror(errno));
141                         StrBufAppendPrintf(Target, "</b><br /><br />\n");
142                         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
143                                 /** flush */
144                         }
145                         free(msg);
146                         return;
147                 }
148                 msg = ptr;
149                 strcpy(&msg[content_length], buf);
150                 content_length += line_length;
151                 strcpy(&msg[content_length], "\n");
152                 content_length += 1;
153         }
154         else {
155                 content_length = StrLength(Source);
156                 free(msg);
157                 msg = (char*) ChrPtr(Source);/* TODO: remove cast */
158                 buffer_length = content_length;
159         }
160
161         /** Do a first pass to isolate the message body */
162         ptr = msg + 1;
163         msgstart = msg;
164         msgend = &msg[content_length];
165
166         while (ptr < msgend) {
167
168                 /** Advance to next tag */
169                 ptr = strchr(ptr, '<');
170                 if ((ptr == NULL) || (ptr >= msgend)) break;
171                 ++ptr;
172                 if ((ptr == NULL) || (ptr >= msgend)) break;
173
174                 /*
175                  *  Look for META tags.  Some messages (particularly in
176                  *  Asian locales) illegally declare a message's character
177                  *  set in the HTML instead of in the MIME headers.  This
178                  *  is wrong but we have to work around it anyway.
179                  */
180                 if (!strncasecmp(ptr, "META", 4)) {
181
182                         char *meta_start;
183                         char *meta_end;
184                         int meta_length;
185                         char *meta;
186                         char *meta_http_equiv;
187                         char *meta_content;
188                         char *spaceptr;
189
190                         meta_start = &ptr[4];
191                         meta_end = strchr(ptr, '>');
192                         if ((meta_end != NULL) && (meta_end <= msgend)) {
193                                 meta_length = meta_end - meta_start + 1;
194                                 meta = malloc(meta_length + 1);
195                                 safestrncpy(meta, meta_start, meta_length);
196                                 meta[meta_length] = 0;
197                                 striplt(meta);
198                                 if (!strncasecmp(meta, "HTTP-EQUIV=", 11)) {
199                                         meta_http_equiv = strdup(&meta[11]);
200                                         spaceptr = strchr(meta_http_equiv, ' ');
201                                         if (spaceptr != NULL) {
202                                                 *spaceptr = 0;
203                                                 meta_content = strdup(++spaceptr);
204                                                 if (!strncasecmp(meta_content, "content=", 8)) {
205                                                         strcpy(meta_content, &meta_content[8]);
206                                                         stripquotes(meta_http_equiv);
207                                                         stripquotes(meta_content);
208                                                         extract_charset_from_meta(charset,
209                                                                         meta_http_equiv, meta_content);
210                                                 }
211                                                 free(meta_content);
212                                         }
213                                         free(meta_http_equiv);
214                                 }
215                                 free(meta);
216                         }
217                 }
218
219                 /*
220                  * Any of these tags cause everything up to and including
221                  * the tag to be removed.
222                  */     
223                 if ( (!strncasecmp(ptr, "HTML", 4))
224                                 ||(!strncasecmp(ptr, "HEAD", 4))
225                                 ||(!strncasecmp(ptr, "/HEAD", 5))
226                                 ||(!strncasecmp(ptr, "BODY", 4)) ) {
227                         char *pBody = NULL;
228
229                         if (!strncasecmp(ptr, "BODY", 4)) {
230                                 pBody = ptr;
231                         }
232                         ptr = strchr(ptr, '>');
233                         if ((ptr == NULL) || (ptr >= msgend)) break;
234                         if ((pBody != NULL) && (ptr - pBody > 4)) {
235                                 char* src;
236                                 char *cid_start, *cid_end;
237
238                                 *ptr = '\0';
239                                 pBody += 4; 
240                                 while ((isspace(*pBody)) && (pBody < ptr))
241                                         pBody ++;
242                                 BodyArea = NewStrBufPlain(NULL,  ptr - pBody);
243
244                                 if (pBody < ptr) {
245                                         src = strstr(pBody, "cid:");
246                                         if (src) {
247                                                 cid_start = src + 4;
248                                                 cid_end = cid_start;
249                                                 while ((*cid_end != '"') && 
250                                                                 !isspace(*cid_end) &&
251                                                                 (cid_end < ptr))
252                                                         cid_end ++;
253
254                                                 /* copy tag and attributes up to src="cid: */
255                                                 StrBufAppendBufPlain(BodyArea, pBody, src - pBody, 0);
256
257                                                 /* add in /webcit/mimepart/<msgno>/CID/ 
258                                                    trailing / stops dumb URL filters getting excited */
259                                                 StrBufAppendPrintf(BodyArea,
260                                                                 "/webcit/mimepart/%d/",msgnum);
261                                                 StrBufAppendBufPlain(BodyArea, cid_start, cid_end - cid_start, 0);
262
263                                                 if (ptr - cid_end > 0)
264                                                         StrBufAppendBufPlain(BodyArea, 
265                                                                         cid_end + 1, 
266                                                                         ptr - cid_end, 0);
267                                         }
268                                         else 
269                                                 StrBufAppendBufPlain(BodyArea, pBody, ptr - pBody, 0);
270                                 }
271                                 *ptr = '>';
272                         }
273                         ++ptr;
274                         if ((ptr == NULL) || (ptr >= msgend)) break;
275                         msgstart = ptr;
276                 }
277
278                 /*
279                  * Any of these tags cause everything including and following
280                  * the tag to be removed.
281                  */
282                 if ( (!strncasecmp(ptr, "/HTML", 5))
283                                 ||(!strncasecmp(ptr, "/BODY", 5)) ) {
284                         --ptr;
285                         msgend = ptr;
286                         strcpy(ptr, "");
287
288                 }
289
290                 ++ptr;
291         }
292         if (msgstart > msg) {
293                 strcpy(msg, msgstart);
294         }
295
296         /* Now go through the message, parsing tags as necessary. */
297         converted_msg = NewStrBufPlain(NULL, content_length + 8192);
298
299
300         /** Convert foreign character sets to UTF-8 if necessary. */
301 #ifdef HAVE_ICONV
302         if ( (strcasecmp(charset, "us-ascii"))
303                         && (strcasecmp(charset, "UTF-8"))
304                         && (strcasecmp(charset, ""))
305            ) {
306                 lprintf(9, "Converting %s to UTF-8\n", charset);
307                 ctdl_iconv_open("UTF-8", charset, &ic);
308                 if (ic == (iconv_t)(-1) ) {
309                         lprintf(5, "%s:%d iconv_open() failed: %s\n",
310                                         __FILE__, __LINE__, strerror(errno));
311                 }
312         }
313         if  (Source == NULL) {
314                 if (ic != (iconv_t)(-1) ) {
315                         ibuf = msg;
316                         ibuflen = content_length;
317                         obuflen = content_length + (content_length / 2) ;
318                         obuf = (char *) malloc(obuflen);
319                         osav = obuf;
320                         iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
321                         content_length = content_length + (content_length / 2) - obuflen;
322                         osav[content_length] = 0;
323                         free(msg);
324                         msg = osav;
325                         iconv_close(ic);
326                 }
327         }
328         else {
329                 if (ic != (iconv_t)(-1) ) {
330                         StrBuf *Buf = NewStrBufPlain(NULL, StrLength(Source) + 8096);;
331                         StrBufConvert(Source, Buf, &ic);
332                         FreeStrBuf(&Buf);
333                         iconv_close(ic);
334                         msg = (char*)ChrPtr(Source); /* TODO: get rid of this. */
335                 }
336         }
337
338 #endif
339
340         /*
341          *      At this point, the message has been stripped down to
342          *      only the content inside the <BODY></BODY> tags, and has
343          *      been converted to UTF-8 if it was originally in a foreign
344          *      character set.  The text is also guaranteed to be null
345          *      terminated now.
346          */
347
348         if (converted_msg == NULL) {
349                 StrBufAppendPrintf(Target, "Error %d: %s<br />%s:%d", errno, strerror(errno), __FILE__, __LINE__);
350                 goto BAIL;
351         }
352
353         if (BodyArea != NULL) {
354                 StrBufAppendBufPlain(converted_msg, HKEY("<table "), 0);  
355                 StrBufAppendBuf(converted_msg, BodyArea, 0);
356                 StrBufAppendBufPlain(converted_msg, HKEY(" width=\"100%\"><tr><td>"), 0);
357         }
358         ptr = msg;
359         msgend = strchr(msg, 0);
360         while (ptr < msgend) {
361
362                 /** Try to sanitize the html of any rogue scripts */
363                 if (!strncasecmp(ptr, "<script", 7)) {
364                         if (scriptlevel == 0) {
365                                 script_start_pos = StrLength(converted_msg);
366                         }
367                         ++scriptlevel;
368                 }
369                 if (!strncasecmp(ptr, "</script", 8)) {
370                         --scriptlevel;
371                 }
372
373                 /**
374                  * Change mailto: links to WebCit mail, by replacing the
375                  * link with one that points back to our mail room.  Due to
376                  * the way we parse URL's, it'll even handle mailto: links
377                  * that have "?subject=" in them.
378                  */
379                 if (!strncasecmp(ptr, "<a href=\"mailto:", 16)) {
380                         content_length += 64;
381                         StrBufAppendPrintf(converted_msg,
382                                         "<a href=\"display_enter?force_room=_MAIL_?recp=");
383                         ptr = &ptr[16];
384                         ++alevel;
385                         ++brak;
386                 }
387                 /** Make external links open in a separate window */
388                 else if (!strncasecmp(ptr, "<a href=\"", 9)) {
389                         ++alevel;
390                         ++brak;
391                         if ( ((strchr(ptr, ':') < strchr(ptr, '/')))
392                                         &&  ((strchr(ptr, '/') < strchr(ptr, '>'))) 
393                            ) {
394                                 /* open external links to new window */
395                                 StrBufAppendPrintf(converted_msg, new_window);
396                                 ptr = &ptr[8];
397                         }
398                         else if ( (treat_as_wiki) && (strncasecmp(ptr, "<a href=\"wiki?", 14)) ) {
399                                 content_length += 64;
400                                 StrBufAppendPrintf(converted_msg, "<a href=\"wiki?page=");
401                                 ptr = &ptr[9];
402                         }
403                         else {
404                                 StrBufAppendPrintf(converted_msg, "<a href=\"");
405                                 ptr = &ptr[9];
406                         }
407                 }
408                 /** Fixup <img src="cid:... ...> to fetch the mime part */
409                 else if (!strncasecmp(ptr, "<img ", 5)) {
410                         char *cid_start, *cid_end;
411                         char* tag_end=strchr(ptr,'>');
412                         char* src;
413                         /* FIXME - handle this situation (maybe someone opened an <img cid... 
414                          * and then ended the message)
415                          */
416                         if (!tag_end) {
417                                 lprintf(9, "tag_end is null and ptr is:\n");
418                                 lprintf(9, "%s\n", ptr);
419                                 lprintf(9, "Theoretical bytes remaining: %d\n", msgend - ptr);
420                         }
421
422                         src=strstr(ptr, "src=\"cid:");
423                         ++brak;
424
425                         if (src
426                             && isspace(*(src-1))
427                                 && tag_end
428                                 && (cid_start=strchr(src,':'))
429                                 && (cid_end=strchr(cid_start,'"'))
430                                 && (cid_end < tag_end)
431                         ) {
432                                 /* copy tag and attributes up to src="cid: */
433                                 StrBufAppendBufPlain(converted_msg, ptr, src - ptr, 0);
434                                 cid_start++;
435
436                                 /* add in /webcit/mimepart/<msgno>/CID/ 
437                                    trailing / stops dumb URL filters getting excited */
438                                 StrBufAppendPrintf(converted_msg,
439                                                 " src=\"/webcit/mimepart/%d/",msgnum);
440                                 StrBufAppendBufPlain(converted_msg, cid_start, cid_end - cid_start, 0);
441                                 StrBufAppendBufPlain(converted_msg, "/\"", -1, 0);
442
443                                 ptr = cid_end+1;
444                         }
445                         StrBufAppendBufPlain(converted_msg, ptr, tag_end - ptr, 0);
446                         ptr = tag_end;
447                 }
448
449                 /**
450                  * Turn anything that looks like a URL into a real link, as long
451                  * as it's not inside a tag already
452                  */
453                 else if ( (brak == 0) && (alevel == 0)
454                      && (!strncasecmp(ptr, "http://", 7))) {
455                                 /** Find the end of the link */
456                                 int strlenptr;
457                                 linklen = 0;
458                                 
459                                 strlenptr = strlen(ptr);
460                                 for (i=0; i<=strlenptr; ++i) {
461                                         if ((ptr[i]==0)
462                                            ||(isspace(ptr[i]))
463                                            ||(ptr[i]==10)
464                                            ||(ptr[i]==13)
465                                            ||(ptr[i]=='(')
466                                            ||(ptr[i]==')')
467                                            ||(ptr[i]=='<')
468                                            ||(ptr[i]=='>')
469                                            ||(ptr[i]=='[')
470                                            ||(ptr[i]==']')
471                                            ||(ptr[i]=='"')
472                                            ||(ptr[i]=='\'')
473                                         ) linklen = i;
474                                         /* did s.b. send us an entity? */
475                                         if (ptr[i] == '&') {
476                                                 if ((ptr[i+2] ==';') ||
477                                                     (ptr[i+3] ==';') ||
478                                                     (ptr[i+5] ==';') ||
479                                                     (ptr[i+6] ==';') ||
480                                                     (ptr[i+7] ==';'))
481                                                         linklen = i;
482                                         }
483                                         if (linklen > 0) break;
484                                 }
485                                 if (linklen > 0) {
486                                         char *ltreviewptr;
487                                         char *nbspreviewptr;
488                                         char linkedchar;
489                                         int len;
490                                         
491                                         len = linklen;
492                                         linkedchar = ptr[len];
493                                         ptr[len] = '\0';
494                                         /* spot for some subject strings tinymce tends to give us. */
495                                         ltreviewptr = strchr(ptr, '<');
496                                         if (ltreviewptr != NULL) {
497                                                 *ltreviewptr = '\0';
498                                                 linklen = ltreviewptr - ptr;
499                                         }
500
501                                         nbspreviewptr = strstr(ptr, "&nbsp;");
502                                         if (nbspreviewptr != NULL) {
503                                                 /* nbspreviewptr = '\0'; */
504                                                 linklen = nbspreviewptr - ptr;
505                                         }
506                                         if (ltreviewptr != 0)
507                                                 *ltreviewptr = '<';
508
509                                         ptr[len] = linkedchar;
510
511                                         content_length += (32 + linklen);
512                                         StrBufAppendPrintf(converted_msg, "%s\"", new_window);
513                                         StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
514                                         StrBufAppendPrintf(converted_msg, "\">");
515                                         StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
516                                         ptr += linklen;
517                                         StrBufAppendPrintf(converted_msg, "</A>");
518                                 }
519                 }
520                 else {
521                         StrBufAppendBufPlain(converted_msg, ptr, 1, 0);
522                         ptr++;
523                 }
524
525
526                 if ((ptr >= msg) && (ptr <= msgend)) {
527                         /*
528                          * We need to know when we're inside a tag,
529                          * so we don't turn things that look like URL's into
530                          * links, when they're already links - or image sources.
531                          */
532                         if ((ptr > msg) && (*(ptr-1) == '<')) {
533                                 ++brak;
534                         }
535                         if ((ptr > msg) && (*(ptr-1) == '>')) {
536                                 --brak;
537                                 if ((scriptlevel == 0) && (script_start_pos >= 0)) {
538                                         StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos);
539                                         script_start_pos = (-1);
540                                 }
541                         }
542                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
543                 }
544         }
545
546         if (BodyArea != NULL) {
547                 StrBufAppendBufPlain(converted_msg, HKEY("</td></tr></table>"), 0);  
548                 FreeStrBuf(&BodyArea);
549         }
550
551         /**     uncomment these two lines to override conversion        */
552         /**     memcpy(converted_msg, msg, content_length);             */
553         /**     output_length = content_length;                         */
554
555         /** Output our big pile of markup */
556         StrBufAppendBuf(Target, converted_msg, 0);
557
558 BAIL:   /** A little trailing vertical whitespace... */
559         StrBufAppendPrintf(Target, "<br /><br />\n");
560
561         /** Now give back the memory */
562         FreeStrBuf(&converted_msg);
563         if ((msg != NULL) && (Source == NULL)) free(msg);
564 }
565
566
567
568
569
570
571 /*
572  * Look for URL's embedded in a buffer and make them linkable.  We use a
573  * target window in order to keep the Citadel session in its own window.
574  */
575 void UrlizeText(StrBuf* Target, StrBuf *Source, StrBuf *WrkBuf)
576 {
577         int len, UrlLen, Offset, TrailerLen;
578         const char *start, *end, *pos;
579         
580         FlushStrBuf(Target);
581
582         start = NULL;
583         len = StrLength(Source);
584         end = ChrPtr(Source) + len;
585         for (pos = ChrPtr(Source); (pos < end) && (start == NULL); ++pos) {
586                 if (!strncasecmp(pos, "http://", 7))
587                         start = pos;
588                 else if (!strncasecmp(pos, "ftp://", 6))
589                         start = pos;
590         }
591
592         if (start == NULL) {
593                 StrBufAppendBuf(Target, Source, 0);
594                 return;
595         }
596         FlushStrBuf(WrkBuf);
597
598         for (pos = ChrPtr(Source) + len; pos > start; --pos) {
599                 if (  (!isprint(*pos))
600                    || (isspace(*pos))
601                    || (*pos == '{')
602                    || (*pos == '}')
603                    || (*pos == '|')
604                    || (*pos == '\\')
605                    || (*pos == '^')
606                    || (*pos == '[')
607                    || (*pos == ']')
608                    || (*pos == '`')
609                    || (*pos == '<')
610                    || (*pos == '>')
611                    || (*pos == '(')
612                    || (*pos == ')')
613                 ) {
614                         end = pos;
615                 }
616         }
617         
618         UrlLen = end - start;
619         StrBufAppendBufPlain(WrkBuf, start, UrlLen, 0);
620
621         Offset = start - ChrPtr(Source);
622         if (Offset != 0)
623                 StrBufAppendBufPlain(Target, ChrPtr(Source), Offset, 0);
624         StrBufAppendPrintf(Target, "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
625                            LB, QU, ChrPtr(WrkBuf), QU, QU, TARGET, 
626                            QU, RB, ChrPtr(WrkBuf), LB, RB);
627
628         TrailerLen = StrLength(Source) - (end - ChrPtr(Source));
629         if (TrailerLen > 0)
630                 StrBufAppendBufPlain(Target, end, TrailerLen, 0);
631 }
632
633
634 void url(char *buf, size_t bufsize)
635 {
636         int len, UrlLen, Offset, TrailerLen, outpos;
637         char *start, *end, *pos;
638         char urlbuf[SIZ];
639         char outbuf[SIZ];
640
641         start = NULL;
642         len = strlen(buf);
643         if (len > bufsize) {
644                 lprintf(1, "URL: content longer than buffer!");
645                 return;
646         }
647         end = buf + len;
648         for (pos = buf; (pos < end) && (start == NULL); ++pos) {
649                 if (!strncasecmp(pos, "http://", 7))
650                         start = pos;
651                 if (!strncasecmp(pos, "ftp://", 6))
652                         start = pos;
653         }
654
655         if (start == NULL)
656                 return;
657
658         for (pos = buf+len; pos > start; --pos) {
659                 if (  (!isprint(*pos))
660                    || (isspace(*pos))
661                    || (*pos == '{')
662                    || (*pos == '}')
663                    || (*pos == '|')
664                    || (*pos == '\\')
665                    || (*pos == '^')
666                    || (*pos == '[')
667                    || (*pos == ']')
668                    || (*pos == '`')
669                    || (*pos == '<')
670                    || (*pos == '>')
671                    || (*pos == '(')
672                    || (*pos == ')')
673                 ) {
674                         end = pos;
675                 }
676         }
677         
678         UrlLen = end - start;
679         if (UrlLen > sizeof(urlbuf)){
680                 lprintf(1, "URL: content longer than buffer!");
681                 return;
682         }
683         memcpy(urlbuf, start, UrlLen);
684         urlbuf[UrlLen] = '\0';
685
686         Offset = start - buf;
687         if ((Offset != 0) && (Offset < sizeof(outbuf)))
688                 memcpy(outbuf, buf, Offset);
689         outpos = snprintf(&outbuf[Offset], sizeof(outbuf) - Offset,  
690                           "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
691                           LB, QU, urlbuf, QU, QU, TARGET, QU, RB, urlbuf, LB, RB);
692         if (outpos >= sizeof(outbuf) - Offset) {
693                 lprintf(1, "URL: content longer than buffer!");
694                 return;
695         }
696
697         TrailerLen = len - (end - start);
698         if (TrailerLen > 0)
699                 memcpy(outbuf + Offset + outpos, end, TrailerLen);
700         if (Offset + outpos + TrailerLen > bufsize) {
701                 lprintf(1, "URL: content longer than buffer!");
702                 return;
703         }
704         memcpy (buf, outbuf, Offset + outpos + TrailerLen);
705         *(buf + Offset + outpos + TrailerLen) = '\0';
706 }
707