From 0149ea4b722c795dfdb9918d2281e859cf064ebe Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Sat, 25 Jun 2011 07:12:46 +0000 Subject: [PATCH] =?utf8?q?more=20work=20on=20libev=B4ing=20the=20rss-reade?= =?utf8?q?r.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- citadel/event_client.h | 4 + citadel/modules/rssclient/serv_rssclient.c | 251 ++++++++++-------- .../urldeshortener/serv_expand_shorter_urls.c | 113 ++++---- .../urldeshortener/serv_expand_shorter_urls.h | 4 +- 4 files changed, 211 insertions(+), 161 deletions(-) diff --git a/citadel/event_client.h b/citadel/event_client.h index a3d467db0..23fdf68db 100644 --- a/citadel/event_client.h +++ b/citadel/event_client.h @@ -92,6 +92,10 @@ struct AsyncIO { evcurl_request_data HttpReq; + /* Saving / loading a message async from / to disk */ + + struct CtdlMessage *AsyncMsg; + struct recptypes AsyncRcp; /* Custom data; its expected to contain AsyncIO so we can save malloc()s... */ void *Data; /* application specific data */ void *CitContext; /* Citadel Session context... */ diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 13a887e0f..a01a92f1c 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -58,6 +58,11 @@ #include "rss_atom_parser.h" +#define TMP_MSGDATA 0xFF +#define TMP_SHORTER_URL_OFFSET 0xFE +#define TMP_SHORTER_URLS 0xFD + + struct rssnetcfg *rnclist = NULL; void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title) { @@ -75,6 +80,25 @@ void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Ti StrBufAppendBufPlain(Message, HKEY("
\n"), 0); } } + +void RSSSaveMessage(struct CtdlMessage *Msg, rss_item *ri, struct UseTable *ut) +{ + + CtdlSubmitMsg(msg, recp, NULL, 0); + CtdlFreeMessage(msg); + + /* write the uidl to the use table so we don't store this item again */ + cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); + free(ut); +} + + +rss_save_msg(msg, recp) +{ + + +} + /* * Commit a fetched and parsed RSS item to disk */ @@ -91,6 +115,7 @@ void rss_save_item(rss_item *ri) struct recptypes *recp = NULL; int msglen = 0; StrBuf *Message; + AsyncIO *OtherIO; recp = (struct recptypes *) malloc(sizeof(struct recptypes)); if (recp == NULL) return; @@ -125,138 +150,150 @@ void rss_save_item(rss_item *ri) strcat(utmsgid, "_rss2ctdl"); } - /* Find out if we've already seen this item */ + /* translate Item into message. */ + CtdlLogPrintf(CTDL_DEBUG, "RSS: translating item...\n"); + if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); + StrBufSpaceToBlank(ri->description); + msg = malloc(sizeof(struct CtdlMessage)); + memset(msg, 0, sizeof(struct CtdlMessage)); + msg->cm_magic = CTDLMESSAGE_MAGIC; + msg->cm_anon_type = MES_NORMAL; + msg->cm_format_type = FMT_RFC822; - cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid)); -#ifndef DEBUG_RSS - if (cdbut != NULL) { - /* Item has already been seen */ - CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid); - cdb_free(cdbut); - - /* rewrite the record anyway, to update the timestamp */ - strcpy(ut.ut_msgid, utmsgid); - ut.ut_timestamp = time(NULL); - cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); + if (ri->guid != NULL) { + msg->cm_fields['E'] = strdup(ChrPtr(ri->guid)); } - else -#endif -{ - /* Item has not been seen, so save it. */ - CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n"); - if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); - StrBufSpaceToBlank(ri->description); - msg = malloc(sizeof(struct CtdlMessage)); - memset(msg, 0, sizeof(struct CtdlMessage)); - msg->cm_magic = CTDLMESSAGE_MAGIC; - msg->cm_anon_type = MES_NORMAL; - msg->cm_format_type = FMT_RFC822; - - if (ri->guid != NULL) { - msg->cm_fields['E'] = strdup(ChrPtr(ri->guid)); - } - if (ri->author_or_creator != NULL) { - char *From; - StrBuf *Encoded = NULL; - int FromAt; + if (ri->author_or_creator != NULL) { + char *From; + StrBuf *Encoded = NULL; + int FromAt; - From = html_to_ascii(ChrPtr(ri->author_or_creator), - StrLength(ri->author_or_creator), - 512, 0); - StrBufPlain(ri->author_or_creator, From, -1); - StrBufTrim(ri->author_or_creator); - free(From); - - FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; - if (!FromAt && StrLength (ri->author_email) > 0) + From = html_to_ascii(ChrPtr(ri->author_or_creator), + StrLength(ri->author_or_creator), + 512, 0); + StrBufPlain(ri->author_or_creator, From, -1); + StrBufTrim(ri->author_or_creator); + free(From); + + FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; + if (!FromAt && StrLength (ri->author_email) > 0) + { + StrBufRFC2047encode(&Encoded, ri->author_or_creator); + msg->cm_fields['A'] = SmashStrBuf(&Encoded); + msg->cm_fields['P'] = SmashStrBuf(&ri->author_email); + } + else + { + if (FromAt) + msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator); + else { StrBufRFC2047encode(&Encoded, ri->author_or_creator); msg->cm_fields['A'] = SmashStrBuf(&Encoded); - msg->cm_fields['P'] = SmashStrBuf(&ri->author_email); - } - else - { - if (FromAt) - msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator); - else - { - StrBufRFC2047encode(&Encoded, ri->author_or_creator); - msg->cm_fields['A'] = SmashStrBuf(&Encoded); - msg->cm_fields['P'] = strdup("rss@localhost"); - } + msg->cm_fields['P'] = strdup("rss@localhost"); } } - else { - msg->cm_fields['A'] = strdup("rss"); - } + } + else { + msg->cm_fields['A'] = strdup("rss"); + } - msg->cm_fields['N'] = strdup(NODENAME); - if (ri->title != NULL) { - long len; - char *Sbj; - StrBuf *Encoded, *QPEncoded; - - QPEncoded = NULL; - StrBufSpaceToBlank(ri->title); - len = StrLength(ri->title); - Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); - len = strlen(Sbj); - if (Sbj[len - 1] == '\n') - { - len --; - Sbj[len] = '\0'; - } - Encoded = NewStrBufPlain(Sbj, len); - free(Sbj); + msg->cm_fields['N'] = strdup(NODENAME); + if (ri->title != NULL) { + long len; + char *Sbj; + StrBuf *Encoded, *QPEncoded; + + QPEncoded = NULL; + StrBufSpaceToBlank(ri->title); + len = StrLength(ri->title); + Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); + len = strlen(Sbj); + if (Sbj[len - 1] == '\n') + { + len --; + Sbj[len] = '\0'; + } + Encoded = NewStrBufPlain(Sbj, len); + free(Sbj); - StrBufTrim(Encoded); - StrBufRFC2047encode(&QPEncoded, Encoded); + StrBufTrim(Encoded); + StrBufRFC2047encode(&QPEncoded, Encoded); - msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); - FreeStrBuf(&Encoded); - } - msg->cm_fields['T'] = malloc(64); - snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); - if (ri->channel_title != NULL) { - if (StrLength(ri->channel_title) > 0) { - msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); - } + msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); + FreeStrBuf(&Encoded); + } + msg->cm_fields['T'] = malloc(64); + snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); + if (ri->channel_title != NULL) { + if (StrLength(ri->channel_title) > 0) { + msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); } - if (ri->link == NULL) - ri->link = NewStrBufPlain(HKEY("")); - // TODO: reenable me ExpandShortUrls(ri->description); - msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; + } + if (ri->link == NULL) + ri->link = NewStrBufPlain(HKEY("")); - Message = NewStrBufPlain(NULL, StrLength(ri->description)); - StrBufPlain(Message, HKEY( - "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" - "\n")); + msg->cm_fields[TMP_SHORTER_URLS] = GetShorterUrls(ri->description); - StrBufAppendBuf(Message, ri->description, 0); - StrBufAppendBufPlain(Message, HKEY("

\n"), 0); + strcpy(ut->ut_msgid, utmsgid); + ut->ut_timestamp = time(NULL); - AppendLink(Message, ri->link, ri->linkTitle, NULL); - AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); - StrBufAppendBufPlain(Message, HKEY("\n"), 0); + msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; - msg->cm_fields['M'] = SmashStrBuf(&Message); + Message = NewStrBufPlain(NULL, StrLength(ri->description)); - CtdlSubmitMsg(msg, recp, NULL, 0); - CtdlFreeMessage(msg); + StrBufPlain(Message, HKEY( + "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" + "\n")); + msg->cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); + StrBufAppendBuf(Message, ri->description, 0); + StrBufAppendBufPlain(Message, HKEY("

\n"), 0); - /* write the uidl to the use table so we don't store this item again */ - strcpy(ut.ut_msgid, utmsgid); - ut.ut_timestamp = time(NULL); - cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); - } - free_recipients(recp); + AppendLink(Message, ri->link, ri->linkTitle, NULL); + AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); + StrBufAppendBufPlain(Message, HKEY("\n"), 0); + + + msg->cm_fields[TMP_MSGDATA] = Message; + + + OtherIO = malloc(sizeof(AsyncIO)); + memset(OtherIO, 0, sizeof(AsyncIO)); + OtherIO->AsyncMsg = msg; + OtherIO->AsyncRcp = recp; + + rss_save_msg(msg, recp); +// msg->cm_fields['M'] = SmashStrBuf(&Message); + + // TODO: reenable me ExpandShortUrls(ri->description); + +/// free_recipients(recp); } + /* Find out if we've already seen this item * / + + cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid)); +#ifndef DEBUG_RSS + if (cdbut != NULL) { + /* Item has already been seen * / + CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid); + cdb_free(cdbut); + + /* rewrite the record anyway, to update the timestamp * / + strcpy(ut.ut_msgid, utmsgid); + ut.ut_timestamp = time(NULL); + cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); + } + else +#endif + { +*/ + /* * Begin a feed parse diff --git a/citadel/modules/urldeshortener/serv_expand_shorter_urls.c b/citadel/modules/urldeshortener/serv_expand_shorter_urls.c index f7dac2a7a..ea37d4034 100644 --- a/citadel/modules/urldeshortener/serv_expand_shorter_urls.c +++ b/citadel/modules/urldeshortener/serv_expand_shorter_urls.c @@ -191,77 +191,84 @@ int SortConstStrByPosition(const void *Item1, const void *Item2) return -1; } -void ExpandShortUrls(StrBuf *Message) +HashList GetShorterUrls(StrBuf Message) { - StrBuf *Shadow; HashList *pUrls; - ConstStr *pCUrl; - const char *pch; - const char *pche; - /* we just suspect URL shorteners to be inside of feeds from twitter * or other short content messages, so don't crawl through real blogs. */ if (StrLength(Message) > 500) - return; + return NULL; pUrls = NewHash(1, Flathash); CrawlMessageForShorterUrls(pUrls, Message); if (GetCount(pUrls) > 0) - { - StrBuf *ShorterUrlStr; - HashPos *Pos; - const char *Key; - void *pv; - long len; + return pURLs; + else + return NULL; - Shadow = NewStrBufPlain(NULL, StrLength(Message)); - SortByPayload (pUrls, SortConstStrByPosition); +} - ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message)); +void ExpandShortUrls(StrBuf *Message, HashList *pUrls, int Callback) +{ + StrBuf *Shadow; + ConstStr *pCUrl; + const char *pch; + const char *pche; - pch = ChrPtr(Message); - pche = pch + StrLength(Message); - Pos = GetNewHashPos(pUrls, 1); - while (GetNextHashPos(pUrls, Pos, &len, &Key, &pv)) - { - pCUrl = (ConstStr*) pv; + StrBuf *ShorterUrlStr; + HashPos *Pos; + const char *Key; + void *pv; + long len; + + Shadow = NewStrBufPlain(NULL, StrLength(Message)); + SortByPayload (pUrls, SortConstStrByPosition); + + ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message)); + + pch = ChrPtr(Message); + pche = pch + StrLength(Message); + Pos = GetNewHashPos(pUrls, 1); + while (GetNextHashPos(pUrls, Pos, &len, &Key, &pv)) + { + pCUrl = (ConstStr*) pv; - if (pch != pCUrl->Key) - StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0); + if (pch != pCUrl->Key) + StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0); - StrBufPlain(ShorterUrlStr, CKEY(*pCUrl)); - if (LookupUrl(ShorterUrlStr)) - { - StrBufAppendBufPlain(Shadow, HKEY(""), 0); - StrBufAppendBuf(Shadow, ShorterUrlStr, 0); - StrBufAppendBufPlain(Shadow, HKEY("["), 0); - StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); - StrBufAppendBufPlain(Shadow, HKEY("]"), 0); - } - else - { - StrBufAppendBufPlain(Shadow, HKEY("Key, pCUrl->len, 0); - StrBufAppendBufPlain(Shadow, HKEY("\">"), 0); - StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); - StrBufAppendBufPlain(Shadow, HKEY(""), 0); - } - pch = pCUrl->Key + pCUrl->len + 1; - + StrBufPlain(ShorterUrlStr, CKEY(*pCUrl)); + if (LookupUrl(ShorterUrlStr)) + { + StrBufAppendBufPlain(Shadow, HKEY(""), 0); + StrBufAppendBuf(Shadow, ShorterUrlStr, 0); + StrBufAppendBufPlain(Shadow, HKEY("["), 0); + StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY("]"), 0); } - if (pch < pche) - StrBufAppendBufPlain(Shadow, pch, pche - pch, 0); - FlushStrBuf(Message); - StrBufAppendBuf(Message, Shadow, 0); - - FreeStrBuf(&ShorterUrlStr); - FreeStrBuf(&Shadow); - DeleteHashPos(&Pos); + else + { + StrBufAppendBufPlain(Shadow, HKEY("Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY("\">"), 0); + StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY(""), 0); + } + pch = pCUrl->Key + pCUrl->len + 1; + } + if (pch < pche) + StrBufAppendBufPlain(Shadow, pch, pche - pch, 0); + FlushStrBuf(Message); + StrBufAppendBuf(Message, Shadow, 0); + + FreeStrBuf(&ShorterUrlStr); + FreeStrBuf(&Shadow); + DeleteHashPos(&Pos); + DeleteHash(&pUrls); } diff --git a/citadel/modules/urldeshortener/serv_expand_shorter_urls.h b/citadel/modules/urldeshortener/serv_expand_shorter_urls.h index 477f6b5a3..4efcc26d0 100644 --- a/citadel/modules/urldeshortener/serv_expand_shorter_urls.h +++ b/citadel/modules/urldeshortener/serv_expand_shorter_urls.h @@ -1 +1,3 @@ -void ExpandShortUrls(StrBuf *Message, Callback) +void ExpandShortUrls(StrBuf *Message, Callback); + +HashList GetShorterUrls(StrBuf Message); -- 2.30.2