]> code.citadel.org Git - citadel.git/blob - citadel/html.c
* Got the html.c mostly working.
[citadel.git] / citadel / html.c
1 /*
2  * html.c -- Functions which handle translation between HTML and plain text
3  * $Id$
4  */
5
6 #include <stdlib.h>
7 #include <unistd.h>
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <string.h>
11
12
13 /*
14  * Convert HTML to plain text.
15  */
16 void html_to_ascii(int screenwidth) {
17         char inbuf[256];
18         char outbuf[256];
19         char tag[1024];
20         int done_reading = 0;
21         char *ptr;
22         int i, ch, did_out, rb;
23         int nest = 0;           /* Bracket nesting level */
24
25         strcpy(inbuf, "");
26         strcpy(outbuf, "");
27
28         do {
29                 /* Fill the input buffer */
30                 if ( (done_reading == 0) && (strlen(inbuf) < 128) ) {
31                         /* FIX ... genericize this */
32                         ptr = fgets(&inbuf[strlen(inbuf)], 127, stdin);
33                         if (ptr == NULL) done_reading = 1;
34                 }
35
36                 /* Do some parsing */
37                 if (strlen(inbuf)>0) {
38
39                     /* Fold in all the spacing */
40                     for (i=0; i<strlen(inbuf); ++i) {
41                         if (inbuf[i]==10) inbuf[i]=32;
42                         if (inbuf[i]==13) inbuf[i]=32;
43                         if (inbuf[i]==9) inbuf[i]=32;
44                         if ((inbuf[i]<32) || (inbuf[i]>126))
45                                 strcpy(&inbuf[i], &inbuf[i+1]);
46                         while ((inbuf[i]==32)&&(inbuf[i+1]==32))
47                                 strcpy(&inbuf[i], &inbuf[i+1]);
48                     }
49
50                     for (i=0; i<strlen(inbuf); ++i) {
51
52                         ch = inbuf[i];
53
54                         if (ch == '<') {
55                                 ++nest;
56                                 strcpy(tag, "");
57                         }
58
59                         else if (ch == '>') {
60                                 if (nest > 0) --nest;
61                                 
62                                 if (!strcasecmp(tag, "P")) {
63                                         strcat(outbuf, "\n\n");
64                                 }
65
66                                 else if (!strcasecmp(tag, "H1")) {
67                                         strcat(outbuf, "\n\n");
68                                 }
69
70                                 else if (!strcasecmp(tag, "H2")) {
71                                         strcat(outbuf, "\n\n");
72                                 }
73
74                                 else if (!strcasecmp(tag, "H3")) {
75                                         strcat(outbuf, "\n\n");
76                                 }
77
78                                 else if (!strcasecmp(tag, "H4")) {
79                                         strcat(outbuf, "\n\n");
80                                 }
81
82                                 else if (!strcasecmp(tag, "/H1")) {
83                                         strcat(outbuf, "\n");
84                                 }
85
86                                 else if (!strcasecmp(tag, "/H2")) {
87                                         strcat(outbuf, "\n");
88                                 }
89
90                                 else if (!strcasecmp(tag, "/H3")) {
91                                         strcat(outbuf, "\n");
92                                 }
93
94                                 else if (!strcasecmp(tag, "/H4")) {
95                                         strcat(outbuf, "\n");
96                                 }
97
98                                 else if (!strcasecmp(tag, "HR")) {
99                                         strcat(outbuf, "\n ");
100                                         for (i=0; i<screenwidth-2; ++i)
101                                                 strcat(outbuf, "-");
102                                         strcat(outbuf, "\n");
103                                 }
104
105                                 else if (!strcasecmp(tag, "BR")) {
106                                         strcat(outbuf, "\n");
107                                 }
108
109                                 else if (!strcasecmp(tag, "TR")) {
110                                         strcat(outbuf, "\n");
111                                 }
112
113                                 else if (!strcasecmp(tag, "/TABLE")) {
114                                         strcat(outbuf, "\n");
115                                 }
116
117                         }
118
119                         else if ((nest > 0) && (strlen(tag)<(sizeof(tag)-1))) {
120                                 tag[strlen(tag)+1] = 0;
121                                 tag[strlen(tag)] = ch;
122                         }
123                                 
124                         else if (!nest) {
125                                 outbuf[strlen(outbuf)+1] = 0;
126                                 outbuf[strlen(outbuf)] = ch;
127                         }
128                     }
129                     strcpy(inbuf, &inbuf[i]);
130                 }
131
132                 /* Convert &; tags to the forbidden characters */
133                 if (strlen(outbuf)>0) for (i=0; i<strlen(outbuf); ++i) {
134
135                         if (!strncasecmp(&outbuf[i], "&nbsp;", 6)) {
136                                 outbuf[i] = ' ';
137                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
138                         }
139
140                         else if (!strncasecmp(&outbuf[i], "&lb;", 4)) {
141                                 outbuf[i] = '<';
142                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
143                         }
144
145                         else if (!strncasecmp(&outbuf[i], "&rb;", 4)) {
146                                 outbuf[i] = '>';
147                                 strcpy(&outbuf[i+1], &outbuf[i+4]);
148                         }
149
150                         else if (!strncasecmp(&outbuf[i], "&amp;", 5)) {
151                                 strcpy(&outbuf[i+1], &outbuf[i+5]);
152                         }
153
154                         else if (!strncasecmp(&outbuf[i], "&quot;", 6)) {
155                                 outbuf[i] = '\"';
156                                 strcpy(&outbuf[i+1], &outbuf[i+6]);
157                         }
158
159                 }
160
161                 /* Output any lines terminated with hard line breaks */
162                 do {
163                         did_out = 0;
164                         if (strlen(outbuf)>0)
165                             for (i = 0; i<strlen(outbuf); ++i) {
166                                 if ( (i<(screenwidth-2)) && (outbuf[i]=='\n')) {
167                                         fwrite(outbuf, i+1, 1, stdout);
168                                         strcpy(outbuf, &outbuf[i+1]);
169                                         i = 0;
170                                         did_out = 1;
171                                 }
172                         }
173                 } while (did_out);
174
175                 /* Add soft line breaks */
176                 if (strlen(outbuf) > (screenwidth - 2)) {
177                         rb = (-1);
178                         for (i=0; i<(screenwidth-2); ++i) {
179                                 if (outbuf[i]==32) rb = i;
180                         }
181                         if (rb>=0) {
182                                 fwrite(outbuf, rb, 1, stdout);
183                                 fwrite("\n", 1, 1, stdout);
184                                 strcpy(outbuf, &outbuf[rb+1]);
185                         } else {
186                                 fwrite(outbuf, screenwidth-2, 1, stdout);
187                                 fwrite("\n", 1, 1, stdout);
188                                 strcpy(outbuf, &outbuf[screenwidth-2]);
189                         }
190                 }
191
192         } while (done_reading == 0);
193         fwrite(outbuf, strlen(outbuf), 1, stdout);
194         fwrite("\n", 1, 1, stdout);
195
196 }
197
198
199 /*
200  * Temporary main loop for testing
201  */
202 int main() {
203         html_to_ascii(80);
204         return 0;
205 }