No more robots.txt, we now welcome spiders
[citadel.git] / webcit / html2html.c
1 /*
2  * Output an HTML message, modifying it slightly to make sure it plays nice
3  * with the rest of our web framework.
4  *
5  * Copyright (c) 2005-2010 by the citadel.org team
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21
22 #include "webcit.h"
23 #include "webserver.h"
24
25
26 /*
27  * Strip surrounding single or double quotes from a string.
28  */
29 void stripquotes(char *s)
30 {
31         int len;
32
33         if (!s) return;
34
35         len = strlen(s);
36         if (len < 2) return;
37
38         if ( ( (s[0] == '\"') && (s[len-1] == '\"') ) || ( (s[0] == '\'') && (s[len-1] == '\'') ) ) {
39                 s[len-1] = 0;
40                 strcpy(s, &s[1]);
41         }
42 }
43
44
45 /*
46  * Check to see if a META tag has overridden the declared MIME character set.
47  *
48  * charset              Character set name (left unchanged if we don't do anything)
49  * meta_http_equiv      Content of the "http-equiv" portion of the META tag
50  * meta_content         Content of the "content" portion of the META tag
51  */
52 void extract_charset_from_meta(char *charset, char *meta_http_equiv, char *meta_content)
53 {
54         char *ptr;
55         char buf[64];
56
57         if (!charset) return;
58         if (!meta_http_equiv) return;
59         if (!meta_content) return;
60
61
62         if (strcasecmp(meta_http_equiv, "Content-type")) return;
63
64         ptr = strchr(meta_content, ';');
65         if (!ptr) return;
66
67         safestrncpy(buf, ++ptr, sizeof buf);
68         striplt(buf);
69         if (!strncasecmp(buf, "charset=", 8)) {
70                 strcpy(charset, &buf[8]);
71
72                 /*
73                  * The brain-damaged webmail program in Microsoft Exchange declares
74                  * a charset of "unicode" when they really mean "UTF-8".  GNU iconv
75                  * treats "unicode" as an alias for "UTF-16" so we have to manually
76                  * fix this here, otherwise messages generated in Exchange webmail
77                  * show up as a big pile of weird characters.
78                  */
79                 if (!strcasecmp(charset, "unicode")) {
80                         strcpy(charset, "UTF-8");
81                 }
82
83                 /* Remove wandering punctuation */
84                 if ((ptr=strchr(charset, '\"'))) *ptr = 0;
85                 striplt(charset);
86         }
87 }
88
89
90
91 /*
92  * Sanitize and enhance an HTML message for display.
93  * Also convert weird character sets to UTF-8 if necessary.
94  * Also fixup img src="cid:..." type inline images to fetch the image
95  *
96  */
97 void output_html(const char *supplied_charset, int treat_as_wiki, int msgnum, StrBuf *Source, StrBuf *Target) {
98         char buf[SIZ];
99         char *msg;
100         char *ptr;
101         char *msgstart;
102         char *msgend;
103         StrBuf *converted_msg;
104         int buffer_length = 1;
105         int line_length = 0;
106         int content_length = 0;
107         char new_window[SIZ];
108         int brak = 0;
109         int alevel = 0;
110         int scriptlevel = 0;
111         int script_start_pos = (-1);
112         int i;
113         int linklen;
114         char charset[128];
115         StrBuf *BodyArea = NULL;
116 #ifdef HAVE_ICONV
117         iconv_t ic = (iconv_t)(-1) ;
118         char *ibuf;                   /* Buffer of characters to be converted */
119         char *obuf;                   /* Buffer for converted characters      */
120         size_t ibuflen;               /* Length of input buffer               */
121         size_t obuflen;               /* Length of output buffer              */
122         char *osav;                   /* Saved pointer to output buffer       */
123 #endif
124         if (Target == NULL)
125                 Target = WC->WBuf;
126
127         safestrncpy(charset, supplied_charset, sizeof charset);
128         msg = strdup("");
129         sprintf(new_window, "<a target=\"%s\" href=", TARGET);
130
131         if (Source == NULL) while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
132                 line_length = strlen(buf);
133                 buffer_length = content_length + line_length + 2;
134                 ptr = realloc(msg, buffer_length);
135                 if (ptr == NULL) {
136                         StrBufAppendPrintf(Target, "<b>");
137                         StrBufAppendPrintf(Target, _("realloc() error! couldn't get %d bytes: %s"),
138                                         buffer_length + 1,
139                                         strerror(errno));
140                         StrBufAppendPrintf(Target, "</b><br /><br />\n");
141                         while (serv_getln(buf, sizeof buf), strcmp(buf, "000")) {
142                                 /** flush */
143                         }
144                         free(msg);
145                         return;
146                 }
147                 msg = ptr;
148                 strcpy(&msg[content_length], buf);
149                 content_length += line_length;
150                 strcpy(&msg[content_length], "\n");
151                 content_length += 1;
152         }
153         else {
154                 content_length = StrLength(Source);
155                 free(msg);
156                 msg = (char*) ChrPtr(Source);/* TODO: remove cast */
157                 buffer_length = content_length;
158         }
159
160         /** Do a first pass to isolate the message body */
161         ptr = msg + 1;
162         msgstart = msg;
163         msgend = &msg[content_length];
164
165         while (ptr < msgend) {
166
167                 /** Advance to next tag */
168                 ptr = strchr(ptr, '<');
169                 if ((ptr == NULL) || (ptr >= msgend)) break;
170                 ++ptr;
171                 if ((ptr == NULL) || (ptr >= msgend)) break;
172
173                 /*
174                  *  Look for META tags.  Some messages (particularly in
175                  *  Asian locales) illegally declare a message's character
176                  *  set in the HTML instead of in the MIME headers.  This
177                  *  is wrong but we have to work around it anyway.
178                  */
179                 if (!strncasecmp(ptr, "META", 4)) {
180
181                         char *meta_start;
182                         char *meta_end;
183                         int meta_length;
184                         char *meta;
185                         char *meta_http_equiv;
186                         char *meta_content;
187                         char *spaceptr;
188
189                         meta_start = &ptr[4];
190                         meta_end = strchr(ptr, '>');
191                         if ((meta_end != NULL) && (meta_end <= msgend)) {
192                                 meta_length = meta_end - meta_start + 1;
193                                 meta = malloc(meta_length + 1);
194                                 safestrncpy(meta, meta_start, meta_length);
195                                 meta[meta_length] = 0;
196                                 striplt(meta);
197                                 if (!strncasecmp(meta, "HTTP-EQUIV=", 11)) {
198                                         meta_http_equiv = strdup(&meta[11]);
199                                         spaceptr = strchr(meta_http_equiv, ' ');
200                                         if (spaceptr != NULL) {
201                                                 *spaceptr = 0;
202                                                 meta_content = strdup(++spaceptr);
203                                                 if (!strncasecmp(meta_content, "content=", 8)) {
204                                                         strcpy(meta_content, &meta_content[8]);
205                                                         stripquotes(meta_http_equiv);
206                                                         stripquotes(meta_content);
207                                                         extract_charset_from_meta(charset,
208                                                                         meta_http_equiv, meta_content);
209                                                 }
210                                                 free(meta_content);
211                                         }
212                                         free(meta_http_equiv);
213                                 }
214                                 free(meta);
215                         }
216                 }
217
218                 /*
219                  * Any of these tags cause everything up to and including
220                  * the tag to be removed.
221                  */     
222                 if ( (!strncasecmp(ptr, "HTML", 4))
223                                 ||(!strncasecmp(ptr, "HEAD", 4))
224                                 ||(!strncasecmp(ptr, "/HEAD", 5))
225                                 ||(!strncasecmp(ptr, "BODY", 4)) ) {
226                         char *pBody = NULL;
227
228                         if (!strncasecmp(ptr, "BODY", 4)) {
229                                 pBody = ptr;
230                         }
231                         ptr = strchr(ptr, '>');
232                         if ((ptr == NULL) || (ptr >= msgend)) break;
233                         if ((pBody != NULL) && (ptr - pBody > 4)) {
234                                 char* src;
235                                 char *cid_start, *cid_end;
236
237                                 *ptr = '\0';
238                                 pBody += 4; 
239                                 while ((isspace(*pBody)) && (pBody < ptr))
240                                         pBody ++;
241                                 BodyArea = NewStrBufPlain(NULL,  ptr - pBody);
242
243                                 if (pBody < ptr) {
244                                         src = strstr(pBody, "cid:");
245                                         if (src) {
246                                                 cid_start = src + 4;
247                                                 cid_end = cid_start;
248                                                 while ((*cid_end != '"') && 
249                                                                 !isspace(*cid_end) &&
250                                                                 (cid_end < ptr))
251                                                         cid_end ++;
252
253                                                 /* copy tag and attributes up to src="cid: */
254                                                 StrBufAppendBufPlain(BodyArea, pBody, src - pBody, 0);
255
256                                                 /* add in /webcit/mimepart/<msgno>/CID/ 
257                                                    trailing / stops dumb URL filters getting excited */
258                                                 StrBufAppendPrintf(BodyArea,
259                                                                 "/webcit/mimepart/%d/",msgnum);
260                                                 StrBufAppendBufPlain(BodyArea, cid_start, cid_end - cid_start, 0);
261
262                                                 if (ptr - cid_end > 0)
263                                                         StrBufAppendBufPlain(BodyArea, 
264                                                                         cid_end + 1, 
265                                                                         ptr - cid_end, 0);
266                                         }
267                                         else 
268                                                 StrBufAppendBufPlain(BodyArea, pBody, ptr - pBody, 0);
269                                 }
270                                 *ptr = '>';
271                         }
272                         ++ptr;
273                         if ((ptr == NULL) || (ptr >= msgend)) break;
274                         msgstart = ptr;
275                 }
276
277                 /*
278                  * Any of these tags cause everything including and following
279                  * the tag to be removed.
280                  */
281                 if ( (!strncasecmp(ptr, "/HTML", 5))
282                                 ||(!strncasecmp(ptr, "/BODY", 5)) ) {
283                         --ptr;
284                         msgend = ptr;
285                         strcpy(ptr, "");
286
287                 }
288
289                 ++ptr;
290         }
291         if (msgstart > msg) {
292                 strcpy(msg, msgstart);
293         }
294
295         /* Now go through the message, parsing tags as necessary. */
296         converted_msg = NewStrBufPlain(NULL, content_length + 8192);
297
298
299         /** Convert foreign character sets to UTF-8 if necessary. */
300 #ifdef HAVE_ICONV
301         if ( (strcasecmp(charset, "us-ascii"))
302                         && (strcasecmp(charset, "UTF-8"))
303                         && (strcasecmp(charset, ""))
304            ) {
305                 lprintf(9, "Converting %s to UTF-8\n", charset);
306                 ctdl_iconv_open("UTF-8", charset, &ic);
307                 if (ic == (iconv_t)(-1) ) {
308                         lprintf(5, "%s:%d iconv_open() failed: %s\n",
309                                         __FILE__, __LINE__, strerror(errno));
310                 }
311         }
312         if  (Source == NULL) {
313                 if (ic != (iconv_t)(-1) ) {
314                         ibuf = msg;
315                         ibuflen = content_length;
316                         obuflen = content_length + (content_length / 2) ;
317                         obuf = (char *) malloc(obuflen);
318                         osav = obuf;
319                         iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
320                         content_length = content_length + (content_length / 2) - obuflen;
321                         osav[content_length] = 0;
322                         free(msg);
323                         msg = osav;
324                         iconv_close(ic);
325                 }
326         }
327         else {
328                 if (ic != (iconv_t)(-1) ) {
329                         StrBuf *Buf = NewStrBufPlain(NULL, StrLength(Source) + 8096);;
330                         StrBufConvert(Source, Buf, &ic);
331                         FreeStrBuf(&Buf);
332                         iconv_close(ic);
333                         msg = (char*)ChrPtr(Source); /* TODO: get rid of this. */
334                 }
335         }
336
337 #endif
338
339         /*
340          *      At this point, the message has been stripped down to
341          *      only the content inside the <BODY></BODY> tags, and has
342          *      been converted to UTF-8 if it was originally in a foreign
343          *      character set.  The text is also guaranteed to be null
344          *      terminated now.
345          */
346
347         if (converted_msg == NULL) {
348                 StrBufAppendPrintf(Target, "Error %d: %s<br />%s:%d", errno, strerror(errno), __FILE__, __LINE__);
349                 goto BAIL;
350         }
351
352         if (BodyArea != NULL) {
353                 StrBufAppendBufPlain(converted_msg, HKEY("<table "), 0);  
354                 StrBufAppendBuf(converted_msg, BodyArea, 0);
355                 StrBufAppendBufPlain(converted_msg, HKEY(" width=\"100%\"><tr><td>"), 0);
356         }
357         ptr = msg;
358         msgend = strchr(msg, 0);
359         while (ptr < msgend) {
360
361                 /** Try to sanitize the html of any rogue scripts */
362                 if (!strncasecmp(ptr, "<script", 7)) {
363                         if (scriptlevel == 0) {
364                                 script_start_pos = StrLength(converted_msg);
365                         }
366                         ++scriptlevel;
367                 }
368                 if (!strncasecmp(ptr, "</script", 8)) {
369                         --scriptlevel;
370                 }
371
372                 /**
373                  * Change mailto: links to WebCit mail, by replacing the
374                  * link with one that points back to our mail room.  Due to
375                  * the way we parse URL's, it'll even handle mailto: links
376                  * that have "?subject=" in them.
377                  */
378                 if (!strncasecmp(ptr, "<a href=\"mailto:", 16)) {
379                         content_length += 64;
380                         StrBufAppendPrintf(converted_msg,
381                                         "<a href=\"display_enter?force_room=_MAIL_?recp=");
382                         ptr = &ptr[16];
383                         ++alevel;
384                         ++brak;
385                 }
386                 /** Make external links open in a separate window */
387                 else if (!strncasecmp(ptr, "<a href=\"", 9)) {
388                         ++alevel;
389                         ++brak;
390                         if ( ((strchr(ptr, ':') < strchr(ptr, '/')))
391                                         &&  ((strchr(ptr, '/') < strchr(ptr, '>'))) 
392                            ) {
393                                 /* open external links to new window */
394                                 StrBufAppendPrintf(converted_msg, new_window);
395                                 ptr = &ptr[8];
396                         }
397                         else if ( (treat_as_wiki) && (strncasecmp(ptr, "<a href=\"wiki?", 14)) ) {
398                                 content_length += 64;
399                                 StrBufAppendPrintf(converted_msg, "<a href=\"wiki?page=");
400                                 ptr = &ptr[9];
401                         }
402                         else {
403                                 StrBufAppendPrintf(converted_msg, "<a href=\"");
404                                 ptr = &ptr[9];
405                         }
406                 }
407                 /** Fixup <img src="cid:... ...> to fetch the mime part */
408                 else if (!strncasecmp(ptr, "<img ", 5)) {
409                         char *cid_start, *cid_end;
410                         char* tag_end=strchr(ptr,'>');
411                         char* src;
412                         /* FIXME - handle this situation (maybe someone opened an <img cid... 
413                          * and then ended the message)
414                          */
415                         if (!tag_end) {
416                                 lprintf(9, "tag_end is null and ptr is:\n");
417                                 lprintf(9, "%s\n", ptr);
418                                 lprintf(9, "Theoretical bytes remaining: %d\n", msgend - ptr);
419                         }
420
421                         src=strstr(ptr, "src=\"cid:");
422                         ++brak;
423
424                         if (src
425                             && isspace(*(src-1))
426                                 && tag_end
427                                 && (cid_start=strchr(src,':'))
428                                 && (cid_end=strchr(cid_start,'"'))
429                                 && (cid_end < tag_end)
430                         ) {
431                                 /* copy tag and attributes up to src="cid: */
432                                 StrBufAppendBufPlain(converted_msg, ptr, src - ptr, 0);
433                                 cid_start++;
434
435                                 /* add in /webcit/mimepart/<msgno>/CID/ 
436                                    trailing / stops dumb URL filters getting excited */
437                                 StrBufAppendPrintf(converted_msg,
438                                                 " src=\"/webcit/mimepart/%d/",msgnum);
439                                 StrBufAppendBufPlain(converted_msg, cid_start, cid_end - cid_start, 0);
440                                 StrBufAppendBufPlain(converted_msg, "/\"", -1, 0);
441
442                                 ptr = cid_end+1;
443                         }
444                         StrBufAppendBufPlain(converted_msg, ptr, tag_end - ptr, 0);
445                         ptr = tag_end;
446                 }
447
448                 /**
449                  * Turn anything that looks like a URL into a real link, as long
450                  * as it's not inside a tag already
451                  */
452                 else if ( (brak == 0) && (alevel == 0)
453                      && (!strncasecmp(ptr, "http://", 7))) {
454                                 /** Find the end of the link */
455                                 int strlenptr;
456                                 linklen = 0;
457                                 
458                                 strlenptr = strlen(ptr);
459                                 for (i=0; i<=strlenptr; ++i) {
460                                         if ((ptr[i]==0)
461                                            ||(isspace(ptr[i]))
462                                            ||(ptr[i]==10)
463                                            ||(ptr[i]==13)
464                                            ||(ptr[i]=='(')
465                                            ||(ptr[i]==')')
466                                            ||(ptr[i]=='<')
467                                            ||(ptr[i]=='>')
468                                            ||(ptr[i]=='[')
469                                            ||(ptr[i]==']')
470                                            ||(ptr[i]=='"')
471                                            ||(ptr[i]=='\'')
472                                         ) linklen = i;
473                                         /* did s.b. send us an entity? */
474                                         if (ptr[i] == '&') {
475                                                 if ((ptr[i+2] ==';') ||
476                                                     (ptr[i+3] ==';') ||
477                                                     (ptr[i+5] ==';') ||
478                                                     (ptr[i+6] ==';') ||
479                                                     (ptr[i+7] ==';'))
480                                                         linklen = i;
481                                         }
482                                         if (linklen > 0) break;
483                                 }
484                                 if (linklen > 0) {
485                                         char *ltreviewptr;
486                                         char *nbspreviewptr;
487                                         char linkedchar;
488                                         int len;
489                                         
490                                         len = linklen;
491                                         linkedchar = ptr[len];
492                                         ptr[len] = '\0';
493                                         /* spot for some subject strings tinymce tends to give us. */
494                                         ltreviewptr = strchr(ptr, '<');
495                                         if (ltreviewptr != NULL) {
496                                                 *ltreviewptr = '\0';
497                                                 linklen = ltreviewptr - ptr;
498                                         }
499
500                                         nbspreviewptr = strstr(ptr, "&nbsp;");
501                                         if (nbspreviewptr != NULL) {
502                                                 /* nbspreviewptr = '\0'; */
503                                                 linklen = nbspreviewptr - ptr;
504                                         }
505                                         if (ltreviewptr != 0)
506                                                 *ltreviewptr = '<';
507
508                                         ptr[len] = linkedchar;
509
510                                         content_length += (32 + linklen);
511                                         StrBufAppendPrintf(converted_msg, "%s\"", new_window);
512                                         StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
513                                         StrBufAppendPrintf(converted_msg, "\">");
514                                         StrBufAppendBufPlain(converted_msg, ptr, linklen, 0);
515                                         ptr += linklen;
516                                         StrBufAppendPrintf(converted_msg, "</A>");
517                                 }
518                 }
519                 else {
520                         StrBufAppendBufPlain(converted_msg, ptr, 1, 0);
521                         ptr++;
522                 }
523
524
525                 if ((ptr >= msg) && (ptr <= msgend)) {
526                         /*
527                          * We need to know when we're inside a tag,
528                          * so we don't turn things that look like URL's into
529                          * links, when they're already links - or image sources.
530                          */
531                         if ((ptr > msg) && (*(ptr-1) == '<')) {
532                                 ++brak;
533                         }
534                         if ((ptr > msg) && (*(ptr-1) == '>')) {
535                                 --brak;
536                                 if ((scriptlevel == 0) && (script_start_pos >= 0)) {
537                                         StrBufCutRight(converted_msg, StrLength(converted_msg) - script_start_pos);
538                                         script_start_pos = (-1);
539                                 }
540                         }
541                         if (!strncasecmp(ptr, "</A>", 3)) --alevel;
542                 }
543         }
544
545         if (BodyArea != NULL) {
546                 StrBufAppendBufPlain(converted_msg, HKEY("</td></tr></table>"), 0);  
547                 FreeStrBuf(&BodyArea);
548         }
549
550         /**     uncomment these two lines to override conversion        */
551         /**     memcpy(converted_msg, msg, content_length);             */
552         /**     output_length = content_length;                         */
553
554         /** Output our big pile of markup */
555         StrBufAppendBuf(Target, converted_msg, 0);
556
557 BAIL:   /** A little trailing vertical whitespace... */
558         StrBufAppendPrintf(Target, "<br /><br />\n");
559
560         /** Now give back the memory */
561         FreeStrBuf(&converted_msg);
562         if ((msg != NULL) && (Source == NULL)) free(msg);
563 }
564
565
566
567
568
569
570 /*
571  * Look for URL's embedded in a buffer and make them linkable.  We use a
572  * target window in order to keep the Citadel session in its own window.
573  */
574 void UrlizeText(StrBuf* Target, StrBuf *Source, StrBuf *WrkBuf)
575 {
576         int len, UrlLen, Offset, TrailerLen;
577         const char *start, *end, *pos;
578         
579         FlushStrBuf(Target);
580
581         start = NULL;
582         len = StrLength(Source);
583         end = ChrPtr(Source) + len;
584         for (pos = ChrPtr(Source); (pos < end) && (start == NULL); ++pos) {
585                 if (!strncasecmp(pos, "http://", 7))
586                         start = pos;
587                 else if (!strncasecmp(pos, "ftp://", 6))
588                         start = pos;
589         }
590
591         if (start == NULL) {
592                 StrBufAppendBuf(Target, Source, 0);
593                 return;
594         }
595         FlushStrBuf(WrkBuf);
596
597         for (pos = ChrPtr(Source) + len; pos > start; --pos) {
598                 if (  (!isprint(*pos))
599                    || (isspace(*pos))
600                    || (*pos == '{')
601                    || (*pos == '}')
602                    || (*pos == '|')
603                    || (*pos == '\\')
604                    || (*pos == '^')
605                    || (*pos == '[')
606                    || (*pos == ']')
607                    || (*pos == '`')
608                    || (*pos == '<')
609                    || (*pos == '>')
610                    || (*pos == '(')
611                    || (*pos == ')')
612                 ) {
613                         end = pos;
614                 }
615         }
616         
617         UrlLen = end - start;
618         StrBufAppendBufPlain(WrkBuf, start, UrlLen, 0);
619
620         Offset = start - ChrPtr(Source);
621         if (Offset != 0)
622                 StrBufAppendBufPlain(Target, ChrPtr(Source), Offset, 0);
623         StrBufAppendPrintf(Target, "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
624                            LB, QU, ChrPtr(WrkBuf), QU, QU, TARGET, 
625                            QU, RB, ChrPtr(WrkBuf), LB, RB);
626
627         TrailerLen = StrLength(Source) - (end - ChrPtr(Source));
628         if (TrailerLen > 0)
629                 StrBufAppendBufPlain(Target, end, TrailerLen, 0);
630 }
631
632
633 void url(char *buf, size_t bufsize)
634 {
635         int len, UrlLen, Offset, TrailerLen, outpos;
636         char *start, *end, *pos;
637         char urlbuf[SIZ];
638         char outbuf[SIZ];
639
640         start = NULL;
641         len = strlen(buf);
642         if (len > bufsize) {
643                 lprintf(1, "URL: content longer than buffer!");
644                 return;
645         }
646         end = buf + len;
647         for (pos = buf; (pos < end) && (start == NULL); ++pos) {
648                 if (!strncasecmp(pos, "http://", 7))
649                         start = pos;
650                 if (!strncasecmp(pos, "ftp://", 6))
651                         start = pos;
652         }
653
654         if (start == NULL)
655                 return;
656
657         for (pos = buf+len; pos > start; --pos) {
658                 if (  (!isprint(*pos))
659                    || (isspace(*pos))
660                    || (*pos == '{')
661                    || (*pos == '}')
662                    || (*pos == '|')
663                    || (*pos == '\\')
664                    || (*pos == '^')
665                    || (*pos == '[')
666                    || (*pos == ']')
667                    || (*pos == '`')
668                    || (*pos == '<')
669                    || (*pos == '>')
670                    || (*pos == '(')
671                    || (*pos == ')')
672                 ) {
673                         end = pos;
674                 }
675         }
676         
677         UrlLen = end - start;
678         if (UrlLen > sizeof(urlbuf)){
679                 lprintf(1, "URL: content longer than buffer!");
680                 return;
681         }
682         memcpy(urlbuf, start, UrlLen);
683         urlbuf[UrlLen] = '\0';
684
685         Offset = start - buf;
686         if ((Offset != 0) && (Offset < sizeof(outbuf)))
687                 memcpy(outbuf, buf, Offset);
688         outpos = snprintf(&outbuf[Offset], sizeof(outbuf) - Offset,  
689                           "%ca href=%c%s%c TARGET=%c%s%c%c%s%c/A%c",
690                           LB, QU, urlbuf, QU, QU, TARGET, QU, RB, urlbuf, LB, RB);
691         if (outpos >= sizeof(outbuf) - Offset) {
692                 lprintf(1, "URL: content longer than buffer!");
693                 return;
694         }
695
696         TrailerLen = len - (end - start);
697         if (TrailerLen > 0)
698                 memcpy(outbuf + Offset + outpos, end, TrailerLen);
699         if (Offset + outpos + TrailerLen > bufsize) {
700                 lprintf(1, "URL: content longer than buffer!");
701                 return;
702         }
703         memcpy (buf, outbuf, Offset + outpos + TrailerLen);
704         *(buf + Offset + outpos + TrailerLen) = '\0';
705 }
706