3 * Copyright (c) 1998-2009 by the citadel.org team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include <sys/types.h>
32 #if TIME_WITH_SYS_TIME
33 # include <sys/time.h>
37 # include <sys/time.h>
46 #include <sys/socket.h>
47 #include <netinet/in.h>
48 #include <arpa/inet.h>
51 #include <libcitadel.h>
54 #include "citserver.h"
61 #include "internet_addressing.h"
64 #include "ctdl_module.h"
65 #include "locate_host.h"
66 #include "citadel_dirs.h"
68 #include "event_client.h"
70 HashList *UrlShorteners = NULL;
72 size_t GetLocationString( void *ptr, size_t size, size_t nmemb, void *userdata)
74 #define LOCATION "location"
75 if (strncasecmp((char*)ptr, LOCATION, sizeof(LOCATION) - 1) == 0)
77 StrBuf *pURL = (StrBuf*) userdata;
78 char *pch = (char*) ptr;
81 pche = pch + (size * nmemb);
82 pch += sizeof(LOCATION);
84 while (isspace(*pch) || (*pch == ':'))
87 while (isspace(*pche) || (*pche == '\0'))
91 StrBufPlain(pURL, pch, pche - pch + 1);
97 eNextState LookupUrlResult(AsyncIO *IO)
99 return eTerminateConnection; /// /TODO
102 int LookupUrl(StrBuf *ShorterUrlStr)
110 IO = (AsyncIO*) malloc(sizeof(AsyncIO));
111 memset(IO, 0, sizeof(AsyncIO));
112 IO->CitContext = CloneContext(CC);
114 ParseURL(&IO->ConnectMe, ShorterUrlStr, 80);
115 CurlPrepareURL(IO->ConnectMe);
116 if (! evcurl_init(IO,
119 "Citadel RSS ShorterURL Expander",
122 CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
125 chnd = IO->HttpReq.chnd;
127 OPT(SSL_VERIFYPEER, 0);
128 OPT(SSL_VERIFYHOST, 0);
129 OPT(FOLLOWLOCATION, 10);
130 #ifdef CURLOPT_HTTP_CONTENT_DECODING
131 OPT(HTTP_CONTENT_DECODING, 1);
134 OPT(HEADERFUNCTION , GetLocationString);
135 OPT(WRITEHEADER, ShorterUrlStr);
138 if (CtdlThreadCheckStop())
141 evcurl_handle_start(IO);
151 void CrawlMessageForShorterUrls(HashList *pUrls, StrBuf *Message)
160 while (GetHash(UrlShorteners, IKEY(nShorter), &pv))
163 pch = ChrPtr(Message);
164 pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
165 while ((pUrl != NULL) && (nHits < 99))
167 pCUrl = malloc(sizeof(ConstStr));
170 pch = pUrl + StrLength((StrBuf*)pv);
171 while (isalnum(*pch)||(*pch == '-')||(*pch == '/'))
173 pCUrl->len = pch - pCUrl->Key;
175 Put(pUrls, IKEY(nHits), pCUrl, NULL);
177 pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
182 int SortConstStrByPosition(const void *Item1, const void *Item2)
184 const ConstStr *p1, *p2;
185 p1 = (const ConstStr*) Item1;
186 p2 = (const ConstStr*) Item2;
187 if (p1->Key == p2->Key)
189 if (p1->Key > p2->Key)
194 HashList *GetShorterUrls(StrBuf *Message)
197 /* we just suspect URL shorteners to be inside of feeds from twitter
198 * or other short content messages, so don't crawl through real blogs.
200 if (StrLength(Message) > 500)
203 pUrls = NewHash(1, Flathash);
204 CrawlMessageForShorterUrls(pUrls, Message);
206 if (GetCount(pUrls) > 0)
213 void ExpandShortUrls(StrBuf *Message, HashList *pUrls, int Callback)
220 StrBuf *ShorterUrlStr;
226 Shadow = NewStrBufPlain(NULL, StrLength(Message));
227 SortByPayload (pUrls, SortConstStrByPosition);
229 ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message));
231 pch = ChrPtr(Message);
232 pche = pch + StrLength(Message);
233 Pos = GetNewHashPos(pUrls, 1);
234 while (GetNextHashPos(pUrls, Pos, &len, &Key, &pv))
236 pCUrl = (ConstStr*) pv;
238 if (pch != pCUrl->Key)
239 StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0);
241 StrBufPlain(ShorterUrlStr, CKEY(*pCUrl));
242 if (LookupUrl(ShorterUrlStr))
244 StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
245 StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
246 StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
247 StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
248 StrBufAppendBufPlain(Shadow, HKEY("["), 0);
249 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
250 StrBufAppendBufPlain(Shadow, HKEY("]</a>"), 0);
254 StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
255 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
256 StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
257 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
258 StrBufAppendBufPlain(Shadow, HKEY("</a>"), 0);
260 pch = pCUrl->Key + pCUrl->len + 1;
264 StrBufAppendBufPlain(Shadow, pch, pche - pch, 0);
265 FlushStrBuf(Message);
266 StrBufAppendBuf(Message, Shadow, 0);
268 FreeStrBuf(&ShorterUrlStr);
276 void LoadUrlShorteners(void)
280 const char *POS = NULL;
281 const char *Err = NULL;
282 StrBuf *Content, *Line;
285 UrlShorteners = NewHash(0, Flathash);
287 fd = open(file_citadel_urlshorteners, 0);
291 Content = NewStrBufPlain(NULL, SIZ);
293 while (POS != StrBufNOTNULL)
295 StrBufTCP_read_buffered_line_fast (Line, Content, &POS, &fd, 1, 1, &Err);
297 if ((*ChrPtr(Line) != '#') && (StrLength(Line) > 0))
299 Put(UrlShorteners, IKEY(i), Line, HFreeStrBuf);
309 FreeStrBuf(&Content);
314 void shorter_url_cleanup(void)
316 DeleteHash(&UrlShorteners);
320 CTDL_MODULE_INIT(urldeshortener)
324 CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
328 LoadUrlShorteners ();
329 CtdlRegisterCleanupHook(shorter_url_cleanup);
331 return "UrlShortener";