SMTP-Client: move the client-shutdown procedure into the DB-Thread; we musn't block...
[citadel.git] / citadel / modules / urldeshortener / serv_expand_shorter_urls.c
1
2 /*
3  *
4  * Copyright (c) 1998-2012 by the citadel.org team
5  *
6  *  This program is open source software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License version 3.
8  *  
9  *  
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  
17  *  
18  *  
19  */
20
21 #include "sysdep.h"
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <stdio.h>
25 #include <termios.h>
26 #include <fcntl.h>
27 #include <signal.h>
28 #include <pwd.h>
29 #include <errno.h>
30 #include <sys/types.h>
31 #include <syslog.h>
32
33 #if TIME_WITH_SYS_TIME
34 # include <sys/time.h>
35 # include <time.h>
36 #else
37 # if HAVE_SYS_TIME_H
38 #  include <sys/time.h>
39 # else
40 #  include <time.h>
41 # endif
42 #endif
43 #include <sys/wait.h>
44 #include <ctype.h>
45 #include <string.h>
46 #include <limits.h>
47 #include <sys/socket.h>
48 #include <netinet/in.h>
49 #include <arpa/inet.h>
50 #include <assert.h>
51
52 #include <libcitadel.h>
53 #include "citadel.h"
54 #include "server.h"
55 #include "citserver.h"
56 #include "support.h"
57 #include "config.h"
58 #include "control.h"
59 #include "user_ops.h"
60 #include "database.h"
61 #include "msgbase.h"
62 #include "internet_addressing.h"
63 #include "genstamp.h"
64 #include "domain.h"
65 #include "ctdl_module.h"
66 #include "locate_host.h"
67 #include "citadel_dirs.h"
68
69 #include "event_client.h"
70
71 HashList *UrlShorteners = NULL;
72
73 size_t GetLocationString( void *ptr, size_t size, size_t nmemb, void *userdata)
74 {
75 #define LOCATION "location"
76         if (strncasecmp((char*)ptr, LOCATION, sizeof(LOCATION) - 1) == 0)
77         {
78                 StrBuf *pURL = (StrBuf*) userdata;
79                 char *pch = (char*) ptr;
80                 char *pche;
81                 
82                 pche = pch + (size * nmemb);
83                 pch += sizeof(LOCATION);
84                 
85                 while (isspace(*pch) || (*pch == ':'))
86                         pch ++;
87
88                 while (isspace(*pche) || (*pche == '\0'))
89                         pche--;
90                 
91                 FlushStrBuf(pURL);
92                 StrBufPlain(pURL, pch, pche - pch + 1); 
93         }
94         return size * nmemb;
95 }
96 eNextState ShutdownLookuUrl(AsyncIO *IO)
97 {
98 //TOOD
99         return eAbort;
100 }
101 eNextState TerminateLookupUrl(AsyncIO *IO)
102 {
103 //TOOD
104         return eAbort;
105 }
106 eNextState TerminateLookupUrlDB(AsyncIO *IO)
107 {
108 //TOOD
109         return eAbort;
110 }
111 eNextState LookupUrlResult(AsyncIO *IO)
112 {
113         return eTerminateConnection; /// /TODO
114 }
115
116 int LookupUrl(StrBuf *ShorterUrlStr)
117 {
118         CURLcode sta;
119         int rc = 0;
120         CURL *chnd;
121         AsyncIO *IO;
122
123
124         IO = (AsyncIO*) malloc(sizeof(AsyncIO));
125         memset(IO, 0, sizeof(AsyncIO));
126         IO->CitContext = CloneContext(CC);
127
128         ParseURL(&IO->ConnectMe, ShorterUrlStr, 80);
129         CurlPrepareURL(IO->ConnectMe);
130         if (! InitcURLIOStruct(IO, 
131 //                        Ctx, 
132                           NULL,
133                           "Citadel RSS ShorterURL Expander",
134                           LookupUrlResult, 
135                           TerminateLookupUrl, 
136                           TerminateLookupUrlDB, 
137                           ShutdownLookuUrl))
138         {
139                 syslog(LOG_ALERT, "Unable to initialize libcurl.\n");
140                 goto shutdown;
141         }
142         chnd = IO->HttpReq.chnd;
143
144         OPT(SSL_VERIFYPEER, 0);
145         OPT(SSL_VERIFYHOST, 0);
146         OPT(FOLLOWLOCATION, 10);
147 #ifdef CURLOPT_HTTP_CONTENT_DECODING
148         OPT(HTTP_CONTENT_DECODING, 1);
149         OPT(ENCODING, "");
150 #endif 
151         OPT(HEADERFUNCTION , GetLocationString);
152         OPT(WRITEHEADER, ShorterUrlStr);
153
154
155         if (server_shutting_down)
156                 goto shutdown ;
157
158         QueueCurlContext(IO);
159
160 shutdown:
161
162         return rc;
163
164 }
165
166
167
168 void CrawlMessageForShorterUrls(HashList *pUrls, StrBuf *Message)
169 {
170         int nHits = 0;
171         void *pv;
172         int nShorter = 0;
173         const char *pch;
174         const char *pUrl;
175         ConstStr *pCUrl;
176
177         while (GetHash(UrlShorteners, IKEY(nShorter), &pv))
178         {
179                 nShorter++;
180                 pch = ChrPtr(Message);
181                 pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
182                 while ((pUrl != NULL) && (nHits < 99))
183                 {
184                         pCUrl = malloc(sizeof(ConstStr));
185
186                         pCUrl->Key = pUrl;
187                         pch = pUrl + StrLength((StrBuf*)pv);
188                         while (isalnum(*pch)||(*pch == '-')||(*pch == '/'))
189                                 pch++;
190                         pCUrl->len = pch - pCUrl->Key;
191
192                         Put(pUrls, IKEY(nHits), pCUrl, NULL);
193                         nHits ++;
194                         pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
195                 }
196         }
197 }
198
199 int SortConstStrByPosition(const void *Item1, const void *Item2)
200 {
201         const ConstStr *p1, *p2;
202         p1 = (const ConstStr*) Item1;
203         p2 = (const ConstStr*) Item2;
204         if (p1->Key == p2->Key)
205                 return 0;
206         if (p1->Key > p2->Key)
207                 return 1;
208         return -1;
209 }
210
211 HashList *GetShorterUrls(StrBuf *Message)
212 {
213         HashList *pUrls;
214         /* we just suspect URL shorteners to be inside of feeds from twitter
215          * or other short content messages, so don't crawl through real blogs.
216          */
217         if (StrLength(Message) > 500)
218                 return NULL;
219
220         pUrls = NewHash(1, Flathash);
221         CrawlMessageForShorterUrls(pUrls, Message);
222
223         if (GetCount(pUrls) > 0)
224                 return pUrls;
225         else 
226                 return NULL;
227
228 }
229
230 void ExpandShortUrls(StrBuf *Message, HashList *pUrls, int Callback)
231 {
232         StrBuf *Shadow;
233         ConstStr *pCUrl;
234         const char *pch;
235         const char *pche;
236
237         StrBuf *ShorterUrlStr;
238         HashPos *Pos;
239         const char *Key;
240         void *pv;
241         long len;
242         
243         Shadow = NewStrBufPlain(NULL, StrLength(Message));
244         SortByPayload (pUrls, SortConstStrByPosition);
245                 
246         ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message));
247                 
248         pch = ChrPtr(Message);
249         pche = pch + StrLength(Message);
250         Pos = GetNewHashPos(pUrls, 1);
251         while (GetNextHashPos(pUrls, Pos, &len, &Key, &pv))
252         {
253                 pCUrl = (ConstStr*) pv;
254
255                 if (pch != pCUrl->Key)
256                         StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0);
257                         
258                 StrBufPlain(ShorterUrlStr, CKEY(*pCUrl));
259                 if (LookupUrl(ShorterUrlStr))
260                 {
261                         StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
262                         StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
263                         StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
264                         StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
265                         StrBufAppendBufPlain(Shadow, HKEY("["), 0);
266                         StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
267                         StrBufAppendBufPlain(Shadow, HKEY("]</a>"), 0);
268                 }
269                 else
270                 {
271                         StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
272                         StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
273                         StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
274                         StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
275                         StrBufAppendBufPlain(Shadow, HKEY("</a>"), 0);
276                 }
277                 pch = pCUrl->Key + pCUrl->len + 1;
278
279         }
280         if (pch < pche)
281                 StrBufAppendBufPlain(Shadow, pch, pche - pch, 0);
282         FlushStrBuf(Message);
283         StrBufAppendBuf(Message, Shadow, 0);
284
285         FreeStrBuf(&ShorterUrlStr);
286         FreeStrBuf(&Shadow);
287         DeleteHashPos(&Pos);
288         
289
290         DeleteHash(&pUrls);
291 }
292
293 void LoadUrlShorteners(void)
294 {
295         int i = 0;
296         int fd;
297         const char *POS = NULL;
298         const char *Err = NULL;
299         StrBuf *Content, *Line;
300
301
302         UrlShorteners = NewHash(0, Flathash);
303
304         fd = open(file_citadel_urlshorteners, 0);
305
306         if (fd != 0)
307         {
308                 Content = NewStrBufPlain(NULL, SIZ);
309                 Line = NewStrBuf();
310                 while (POS != StrBufNOTNULL)
311                 {
312                         StrBufTCP_read_buffered_line_fast (Line, Content, &POS, &fd, 1, 1, &Err);
313                         StrBufTrim(Line);
314                         if ((*ChrPtr(Line) != '#') && (StrLength(Line) > 0))
315                         {
316                                 Put(UrlShorteners, IKEY(i), Line, HFreeStrBuf);
317                                 i++;
318                                 Line = NewStrBuf();
319                         }
320                         else
321                                 FlushStrBuf(Line);
322                         if (POS == NULL)
323                                 POS = StrBufNOTNULL;
324                 }
325                 FreeStrBuf(&Line);
326                 FreeStrBuf(&Content);
327         }
328         close(fd);
329 }
330
331 void shorter_url_cleanup(void)
332 {
333         DeleteHash(&UrlShorteners);
334 }
335
336
337 CTDL_MODULE_INIT(urldeshortener)
338 {
339         if (threading)
340         {
341                 syslog(LOG_INFO, "%s\n", curl_version());
342         }
343         else 
344         {
345                 LoadUrlShorteners ();
346                 CtdlRegisterCleanupHook(shorter_url_cleanup);
347         }
348         return "UrlShortener";
349 }