From: Wilfried Goesgens Date: Mon, 21 Feb 2011 22:13:33 +0000 (+0100) Subject: fixing html2ascii X-Git-Tag: v8.01~360 X-Git-Url: https://code.citadel.org/?a=commitdiff_plain;h=d64007d417e8319498cfa50669ca649e2d475672;p=citadel.git fixing html2ascii - add possibility to scan 4 digit entities - don't scan over the end - add tests --- diff --git a/libcitadel/lib/html_to_ascii.c b/libcitadel/lib/html_to_ascii.c index b80272aec..9a898cb70 100644 --- a/libcitadel/lib/html_to_ascii.c +++ b/libcitadel/lib/html_to_ascii.c @@ -470,8 +470,12 @@ char *html_to_ascii(const char *inputmsg, int msglen, int screenwidth, int do_ci } /* two-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+4] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + (outbuf[i+4] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%02d", &scanch); outbuf[i] = scanch; @@ -479,14 +483,34 @@ char *html_to_ascii(const char *inputmsg, int msglen, int screenwidth, int do_ci } /* three-digit decimal equivalents */ - else if ((!strncmp(&outbuf[i], "&#", 2)) - && (outbuf[i+5] == ';') ) { + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + (outbuf[i + 5] == ';') ) + { scanch = 0; sscanf(&outbuf[i+2], "%03d", &scanch); outbuf[i] = scanch; strcpy(&outbuf[i+1], &outbuf[i+6]); } + /* four-digit decimal equivalents */ + else if (outbuf[i] == '&' && + outbuf[i + 1] == '#' && + isdigit(outbuf[i + 2]) && + isdigit(outbuf[i + 3]) && + isdigit(outbuf[i + 4]) && + isdigit(outbuf[i + 5]) && + (outbuf[i + 6] == ';') ) + { + scanch = 0; + sscanf(&outbuf[i+2], "%04d", &scanch); + outbuf[i] = scanch; + strcpy(&outbuf[i+1], &outbuf[i+6]); + } + } /* Make sure the output buffer is big enough */ diff --git a/libcitadel/tests/stringbuf_conversion.c b/libcitadel/tests/stringbuf_conversion.c index e28416242..a554c8261 100644 --- a/libcitadel/tests/stringbuf_conversion.c +++ b/libcitadel/tests/stringbuf_conversion.c @@ -30,6 +30,7 @@ int fromstdin = 0; int parse_email = 0; +int parse_html = 0; static void TestRevalidateStrBuf(StrBuf *Buf) { CU_ASSERT(strlen(ChrPtr(Buf)) == StrLength(Buf)); @@ -202,6 +203,29 @@ static void TestEncodeEmailSTDIN(void) } +static void TestHTML2ASCII_line(void) +{ + int fdin = 0;// STDIN + const char *Err; + StrBuf *Source; + char *Target; + + Source = NewStrBuf(); + + while (fdin == 0) { + + StrBufTCP_read_line(Source, &fdin, 0, &Err); + printf("the source:>%s<\n", ChrPtr(Source)); + Target = html_to_ascii(ChrPtr(Source), StrLength(Source), 80, 0); + + printf("the target:>%s<\n", Target); + FlushStrBuf(Source); + free(Target); + } + + FreeStrBuf(&Source); +} + static void AddStrBufSimlpeTests(void) { @@ -209,22 +233,25 @@ static void AddStrBufSimlpeTests(void) CU_pTest pTest = NULL; pGroup = CU_add_suite("TestStringBufConversions", NULL, NULL); - if (!parse_email) { + if (parse_email) { if (!fromstdin) { - pTest = CU_add_test(pGroup, "testRFC822Decode", TestRFC822Decode); - pTest = CU_add_test(pGroup, "testRFC822Decode1", TestRFC822Decode); - pTest = CU_add_test(pGroup, "testRFC822Decode2", TestRFC822Decode); - pTest = CU_add_test(pGroup, "testRFC822Decode3", TestRFC822Decode); + pTest = CU_add_test(pGroup, "TestParseEmailSTDIN", TestEncodeEmail); } else - pTest = CU_add_test(pGroup, "testRFC822DecodeSTDIN", TestRFC822DecodeStdin); + pTest = CU_add_test(pGroup, "TestParseEmailSTDIN", TestEncodeEmailSTDIN); + } + else if (parse_html) { + pTest = CU_add_test(pGroup, "TestParseHTMLSTDIN", TestHTML2ASCII_line); } else { if (!fromstdin) { - pTest = CU_add_test(pGroup, "TestParseEmailSTDIN", TestEncodeEmail); + pTest = CU_add_test(pGroup, "testRFC822Decode", TestRFC822Decode); + pTest = CU_add_test(pGroup, "testRFC822Decode1", TestRFC822Decode); + pTest = CU_add_test(pGroup, "testRFC822Decode2", TestRFC822Decode); + pTest = CU_add_test(pGroup, "testRFC822Decode3", TestRFC822Decode); } else - pTest = CU_add_test(pGroup, "TestParseEmailSTDIN", TestEncodeEmailSTDIN); + pTest = CU_add_test(pGroup, "testRFC822DecodeSTDIN", TestRFC822DecodeStdin); } } @@ -234,8 +261,11 @@ int main(int argc, char* argv[]) { int a; - while ((a = getopt(argc, argv, "@i")) != EOF) + while ((a = getopt(argc, argv, "@ih")) != EOF) switch (a) { + case 'h': + parse_html = 1; + break; case '@': parse_email = 1; break; diff --git a/libcitadel/tests/testdata/html/entitystrings.txt b/libcitadel/tests/testdata/html/entitystrings.txt new file mode 100644 index 000000000..f34ebbdd7 --- /dev/null +++ b/libcitadel/tests/testdata/html/entitystrings.txt @@ -0,0 +1,2 @@ +TRASH Messenger Bags – Hip Pack. +Abandon ‘Share The Road’