4 * Copyright (c) 1998-2012 by the citadel.org team
6 * This program is open source software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 3.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
30 #include <sys/types.h>
33 #if TIME_WITH_SYS_TIME
34 # include <sys/time.h>
38 # include <sys/time.h>
47 #include <sys/socket.h>
48 #include <netinet/in.h>
49 #include <arpa/inet.h>
52 #include <libcitadel.h>
55 #include "citserver.h"
62 #include "internet_addressing.h"
65 #include "ctdl_module.h"
66 #include "locate_host.h"
67 #include "citadel_dirs.h"
69 #include "event_client.h"
71 HashList *UrlShorteners = NULL;
73 size_t GetLocationString( void *ptr, size_t size, size_t nmemb, void *userdata)
75 #define LOCATION "location"
76 if (strncasecmp((char*)ptr, LOCATION, sizeof(LOCATION) - 1) == 0)
78 StrBuf *pURL = (StrBuf*) userdata;
79 char *pch = (char*) ptr;
82 pche = pch + (size * nmemb);
83 pch += sizeof(LOCATION);
85 while (isspace(*pch) || (*pch == ':'))
88 while (isspace(*pche) || (*pche == '\0'))
92 StrBufPlain(pURL, pch, pche - pch + 1);
96 eNextState ShutdownLookuUrl(AsyncIO *IO)
101 eNextState TerminateLookupUrl(AsyncIO *IO)
106 eNextState TerminateLookupUrlDB(AsyncIO *IO)
111 eNextState LookupUrlResult(AsyncIO *IO)
113 return eTerminateConnection; /// /TODO
116 int LookupUrl(StrBuf *ShorterUrlStr)
124 IO = (AsyncIO*) malloc(sizeof(AsyncIO));
125 memset(IO, 0, sizeof(AsyncIO));
126 IO->CitContext = CloneContext(CC);
128 ParseURL(&IO->ConnectMe, ShorterUrlStr, 80);
129 CurlPrepareURL(IO->ConnectMe);
130 if (! InitcURLIOStruct(IO,
133 "Citadel RSS ShorterURL Expander",
136 TerminateLookupUrlDB,
139 syslog(LOG_ALERT, "Unable to initialize libcurl.\n");
142 chnd = IO->HttpReq.chnd;
144 OPT(SSL_VERIFYPEER, 0);
145 OPT(SSL_VERIFYHOST, 0);
146 OPT(FOLLOWLOCATION, 10);
147 #ifdef CURLOPT_HTTP_CONTENT_DECODING
148 OPT(HTTP_CONTENT_DECODING, 1);
151 OPT(HEADERFUNCTION , GetLocationString);
152 OPT(WRITEHEADER, ShorterUrlStr);
155 if (server_shutting_down)
158 QueueCurlContext(IO);
168 void CrawlMessageForShorterUrls(HashList *pUrls, StrBuf *Message)
177 while (GetHash(UrlShorteners, IKEY(nShorter), &pv))
180 pch = ChrPtr(Message);
181 pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
182 while ((pUrl != NULL) && (nHits < 99))
184 pCUrl = malloc(sizeof(ConstStr));
187 pch = pUrl + StrLength((StrBuf*)pv);
188 while (isalnum(*pch)||(*pch == '-')||(*pch == '/'))
190 pCUrl->len = pch - pCUrl->Key;
192 Put(pUrls, IKEY(nHits), pCUrl, NULL);
194 pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
199 int SortConstStrByPosition(const void *Item1, const void *Item2)
201 const ConstStr *p1, *p2;
202 p1 = (const ConstStr*) Item1;
203 p2 = (const ConstStr*) Item2;
204 if (p1->Key == p2->Key)
206 if (p1->Key > p2->Key)
211 HashList *GetShorterUrls(StrBuf *Message)
214 /* we just suspect URL shorteners to be inside of feeds from twitter
215 * or other short content messages, so don't crawl through real blogs.
217 if (StrLength(Message) > 500)
220 pUrls = NewHash(1, Flathash);
221 CrawlMessageForShorterUrls(pUrls, Message);
223 if (GetCount(pUrls) > 0)
230 void ExpandShortUrls(StrBuf *Message, HashList *pUrls, int Callback)
237 StrBuf *ShorterUrlStr;
243 Shadow = NewStrBufPlain(NULL, StrLength(Message));
244 SortByPayload (pUrls, SortConstStrByPosition);
246 ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message));
248 pch = ChrPtr(Message);
249 pche = pch + StrLength(Message);
250 Pos = GetNewHashPos(pUrls, 1);
251 while (GetNextHashPos(pUrls, Pos, &len, &Key, &pv))
253 pCUrl = (ConstStr*) pv;
255 if (pch != pCUrl->Key)
256 StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0);
258 StrBufPlain(ShorterUrlStr, CKEY(*pCUrl));
259 if (LookupUrl(ShorterUrlStr))
261 StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
262 StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
263 StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
264 StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
265 StrBufAppendBufPlain(Shadow, HKEY("["), 0);
266 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
267 StrBufAppendBufPlain(Shadow, HKEY("]</a>"), 0);
271 StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
272 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
273 StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
274 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
275 StrBufAppendBufPlain(Shadow, HKEY("</a>"), 0);
277 pch = pCUrl->Key + pCUrl->len + 1;
281 StrBufAppendBufPlain(Shadow, pch, pche - pch, 0);
282 FlushStrBuf(Message);
283 StrBufAppendBuf(Message, Shadow, 0);
285 FreeStrBuf(&ShorterUrlStr);
293 void LoadUrlShorteners(void)
297 const char *POS = NULL;
298 const char *Err = NULL;
299 StrBuf *Content, *Line;
302 UrlShorteners = NewHash(0, Flathash);
304 fd = open(file_citadel_urlshorteners, 0);
308 Content = NewStrBufPlain(NULL, SIZ);
310 while (POS != StrBufNOTNULL)
312 StrBufTCP_read_buffered_line_fast (Line, Content, &POS, &fd, 1, 1, &Err);
314 if ((*ChrPtr(Line) != '#') && (StrLength(Line) > 0))
316 Put(UrlShorteners, IKEY(i), Line, HFreeStrBuf);
326 FreeStrBuf(&Content);
331 void shorter_url_cleanup(void)
333 DeleteHash(&UrlShorteners);
337 CTDL_MODULE_INIT(urldeshortener)
341 syslog(LOG_INFO, "%s\n", curl_version());
345 LoadUrlShorteners ();
346 CtdlRegisterCleanupHook(shorter_url_cleanup);
348 return "UrlShortener";