From ae60aa1708e79de5ad979c427d06f431ee1f6d0d Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Sun, 10 Oct 2010 23:11:11 +0200 Subject: [PATCH] * load the url shortener links from a config file --- citadel/Makefile.in | 3 +- citadel/include/citadel_dirs.h | 1 + citadel/modules/rssclient/serv_rssclient.c | 214 ++++++++++++--------- citadel/utillib/citadel_dirs.c | 6 + 4 files changed, 130 insertions(+), 94 deletions(-) diff --git a/citadel/Makefile.in b/citadel/Makefile.in index 304596f70..f1c873674 100644 --- a/citadel/Makefile.in +++ b/citadel/Makefile.in @@ -259,7 +259,7 @@ install-data: network/systems; do \ $(srcdir)/mkinstalldirs $(DESTDIR)$(prefix)/$$i; \ done - @for i in funambol_newmail_soap.xml notify_about_newmail.js citadel.rc public_clients \ + @for i in funambol_newmail_soap.xml notify_about_newmail.js citadel.rc public_clients citadel_urlshorteners.rc \ `find $(srcdir)/help $(srcdir)/messages $(srcdir)/network -type f | grep -v .svn`; do \ echo $(INSTALL_DATA) $$i $(DESTDIR)$(prefix)/$$i; \ $(INSTALL_DATA) $$i $(DESTDIR)$(prefix)/$$i; \ @@ -275,6 +275,7 @@ install-data-new: done $(srcdir)/mkinstalldirs $(DESTDIR)$(ETC_DIR)/ $(INSTALL_DATA) $(srcdir)/public_clients $(DESTDIR)$(ETC_DIR)/public_clients + $(INSTALL_DATA) $(srcdir)/citadel_urlshorteners.rc $(DESTDIR)$(ETC_DIR)/citadel_urlshorteners.rc $(INSTALL_DATA) $(srcdir)/citadel.rc $(DESTDIR)$(ETC_DIR)/citadel.rc $(INSTALL_DATA) $(srcdir)/network/mail.aliases $(DESTDIR)$(ETC_DIR)/mail.aliases$ diff --git a/citadel/include/citadel_dirs.h b/citadel/include/citadel_dirs.h index 05ce0fc37..147bf0276 100644 --- a/citadel/include/citadel_dirs.h +++ b/citadel/include/citadel_dirs.h @@ -39,6 +39,7 @@ extern char ctdl_utilbin_dir[PATH_MAX]; extern char file_citadel_control[PATH_MAX]; extern char file_citadel_rc[PATH_MAX]; extern char file_citadel_config[PATH_MAX]; +extern char file_citadel_urlshorteners[PATH_MAX]; extern char file_lmtp_socket[PATH_MAX]; extern char file_lmtp_unfiltered_socket[PATH_MAX]; extern char file_arcq[PATH_MAX]; diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 6e96ec822..341e8b025 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -57,18 +57,6 @@ #include "context.h" - -const char *LinkShortenerServices[] = { -"http://bit.ly/", -"http://krz.ch/", -"http://flic.kr/", -"http://sns.ly/", -"http://wp.me/", -"http://ow.ly/", -"http://tinyurl.com/", -NULL -}; - typedef struct rssnetcfg rssnetcfg; struct rssnetcfg { rssnetcfg *next; @@ -124,9 +112,10 @@ typedef struct _rsscollection { } rsscollection; struct rssnetcfg *rnclist = NULL; -HashList *StartHandlers; -HashList *EndHandlers; -HashList *KnownNameSpaces; +HashList *StartHandlers = NULL; +HashList *EndHandlers = NULL; +HashList *KnownNameSpaces = NULL; +HashList *UrlShorteners = NULL; void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len) { rss_xml_handler *h; @@ -407,32 +396,31 @@ shutdown: void CrawlMessageForShorterUrls(HashList *pUrls, StrBuf *Message) { int nHits = 0; - const char *pShortenerService; + void *pv; int nShorter = 0; const char *pch; const char *pUrl; ConstStr *pCUrl; - pShortenerService = LinkShortenerServices[nShorter++]; - while (pShortenerService != NULL) + while (GetHash(UrlShorteners, IKEY(nShorter), &pv)) { + nShorter++; pch = ChrPtr(Message); - pUrl = strstr(pch, pShortenerService); + pUrl = strstr(pch, ChrPtr((StrBuf*)pv)); while ((pUrl != NULL) && (nHits < 99)) { pCUrl = malloc(sizeof(ConstStr)); pCUrl->Key = pUrl; - pch = pUrl + strlen(pShortenerService); + pch = pUrl + StrLength((StrBuf*)pv); while (isalnum(*pch)||(*pch == '-')||(*pch == '/')) pch++; pCUrl->len = pch - pCUrl->Key; Put(pUrls, IKEY(nHits), pCUrl, NULL); nHits ++; - pUrl = strstr(pch, pShortenerService); + pUrl = strstr(pch, ChrPtr((StrBuf*)pv)); } - pShortenerService = LinkShortenerServices[nShorter++]; } } @@ -625,7 +613,6 @@ void rss_save_item(rss_item *ri) char *From; StrBuf *Encoded = NULL; int FromAt; - int FromLen; From = html_to_ascii(ChrPtr(ri->author_or_creator), StrLength(ri->author_or_creator), @@ -1504,6 +1491,43 @@ void rssclient_scan(void) { return; } +void LoadUrlShorteners(void) +{ + int i = 0; + int fd; + const char *POS = NULL; + const char *Err = NULL; + StrBuf *Content, *Line; + + + UrlShorteners = NewHash(0, Flathash); + + fd = open(file_citadel_urlshorteners, 0); + + if (fd != 0) + { + Content = NewStrBufPlain(NULL, SIZ); + Line = NewStrBuf(); + while (POS != StrBufNOTNULL) + { + StrBufTCP_read_buffered_line_fast (Line, Content, &POS, &fd, 1, 1, &Err); + StrBufTrim(Line); + if ((*ChrPtr(Line) != '#') && (StrLength(Line) > 0)) + { + Put(UrlShorteners, IKEY(i), Line, HFreeStrBuf); + i++; + Line = NewStrBuf(); + } + else + FlushStrBuf(Line); + if (POS == NULL) + POS = StrBufNOTNULL; + } + FreeStrBuf(&Line); + FreeStrBuf(&Content); + } + close(fd); +} CTDL_MODULE_INIT(rssclient) { @@ -1512,88 +1536,92 @@ CTDL_MODULE_INIT(rssclient) CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version()); CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); } - - StartHandlers = NewHash(1, NULL); - EndHandlers = NewHash(1, NULL); - - AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss")); - AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf")); - AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed")); - AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item")); - AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry")); - AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link")); - - AddRSSEndHandler(ATOMRSS_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid")); - AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id")); - AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link")); + else + { + LoadUrlShorteners (); + + StartHandlers = NewHash(1, NULL); + EndHandlers = NewHash(1, NULL); + + AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss")); + AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf")); + AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed")); + AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item")); + AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry")); + AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link")); + + AddRSSEndHandler(ATOMRSS_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); + AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid")); + AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id")); + AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link")); #if 0 // hm, rss to the comments of that blog, might be interesting in future, but... - AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss")); + AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss")); // comment count... - AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments")); + AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments")); #endif - AddRSSEndHandler(RSSATOM_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(ATOM_item_content_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded")); - AddRSSEndHandler(ATOM_item_summary_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description")); - AddRSSEndHandler(ATOM_item_published_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published")); - AddRSSEndHandler(ATOM_item_updated_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated")); - AddRSSEndHandler(RSS_item_pubdate_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate")); - AddRSSEndHandler(RSS_item_date_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("date")); - AddRSSEndHandler(RSS_item_author_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("author")); - AddRSSEndHandler(RSS_item_creator_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator")); + AddRSSEndHandler(RSSATOM_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); + AddRSSEndHandler(ATOM_item_content_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content")); + AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded")); + AddRSSEndHandler(ATOM_item_summary_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary")); + AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description")); + AddRSSEndHandler(ATOM_item_published_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published")); + AddRSSEndHandler(ATOM_item_updated_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated")); + AddRSSEndHandler(RSS_item_pubdate_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate")); + AddRSSEndHandler(RSS_item_date_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("date")); + AddRSSEndHandler(RSS_item_author_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("author")); + AddRSSEndHandler(RSS_item_creator_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator")); /* */ - AddRSSEndHandler(ATOM_item_email_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email")); - AddRSSEndHandler(ATOM_item_name_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name")); - AddRSSEndHandler(ATOM_item_uri_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri")); + AddRSSEndHandler(ATOM_item_email_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email")); + AddRSSEndHandler(ATOM_item_name_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name")); + AddRSSEndHandler(ATOM_item_uri_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri")); /* */ - AddRSSEndHandler(RSS_item_item_end, RSS_RSS, HKEY("item")); - AddRSSEndHandler(RSS_item_rss_end, RSS_RSS, HKEY("rss")); - AddRSSEndHandler(RSS_item_rdf_end, RSS_RSS, HKEY("rdf")); - AddRSSEndHandler(ATOM_item_entry_end, RSS_ATOM, HKEY("entry")); + AddRSSEndHandler(RSS_item_item_end, RSS_RSS, HKEY("item")); + AddRSSEndHandler(RSS_item_rss_end, RSS_RSS, HKEY("rss")); + AddRSSEndHandler(RSS_item_rdf_end, RSS_RSS, HKEY("rdf")); + AddRSSEndHandler(ATOM_item_entry_end, RSS_ATOM, HKEY("entry")); /* at the start of atoms:
  • link to resource
  • ignore them. */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); /* links to other feed generators... */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - - KnownNameSpaces = NewHash(1, NULL); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); + + KnownNameSpaces = NewHash(1, NULL); + Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler); #if 0 - /* we don't like these namespaces because of they shadow our usefull parameters. */ - Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); + /* we don't like these namespaces because of they shadow our usefull parameters. */ + Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); #endif + } return "rssclient"; } diff --git a/citadel/utillib/citadel_dirs.c b/citadel/utillib/citadel_dirs.c index ddaf26ede..8863b46bd 100644 --- a/citadel/utillib/citadel_dirs.c +++ b/citadel/utillib/citadel_dirs.c @@ -74,6 +74,7 @@ char ctdl_bin_dir[PATH_MAX]=""; char file_citadel_control[PATH_MAX]=""; char file_citadel_rc[PATH_MAX]=""; char file_citadel_config[PATH_MAX]=""; +char file_citadel_urlshorteners[PATH_MAX]=""; char file_lmtp_socket[PATH_MAX]=""; char file_lmtp_unfiltered_socket[PATH_MAX]=""; char file_arcq[PATH_MAX]=""; @@ -243,6 +244,11 @@ void calc_dirs_n_files(int relh, int home, const char *relhome, char *ctdldir, "%scitadel.rc", ctdl_etc_dir); StripSlashes(file_citadel_rc, 0); + snprintf(file_citadel_urlshorteners, + sizeof file_citadel_urlshorteners, + "%scitadel_urlshorteners.rc", + ctdl_etc_dir); + StripSlashes(file_citadel_urlshorteners, 0); snprintf(file_lmtp_socket, sizeof file_lmtp_socket, "%slmtp.socket", -- 2.30.2