]> code.citadel.org Git - citadel.git/blob - citadel/mime_parser.c
More work on the MIME parser
[citadel.git] / citadel / mime_parser.c
1 /*
2  * mime_parser.c
3  *
4  * This is a really bad attempt at writing a parser to handle MIME-encoded
5  * messages.
6  *
7  * Copyright (c) 1998-1999 by Art Cancro
8  * This code is distributed under the terms of the GNU General Public License.
9  *
10  */
11
12 #include <stdlib.h>
13 #include <unistd.h>
14 #include <stdio.h>
15 #include <signal.h>
16 #include <sys/types.h>
17 #include <ctype.h>
18 #include <string.h>
19 #include "mime_parser.h"
20
21
22
23 void extract_key(char *target, char *source, char *key) {
24         int a, b;
25
26         strcpy(target, source);
27         for (a=0; a<strlen(target); ++a) {
28                 if ((!strncasecmp(&target[a], key, strlen(key)))
29                    && (target[a+strlen(key)]=='=')) {
30                         strcpy(target, &target[a+strlen(key)+1]);
31                         if (target[0]==34) strcpy(target, &target[1]);
32                         for (b=0; b<strlen(target); ++b)
33                                 if (target[b]==34) target[b]=0;
34                         return;
35                         }
36                 }
37         strcpy(target, "");
38         }
39
40
41
42 /* 
43  * Utility function to "readline" from memory
44  * (returns new pointer)
45  */
46 char *memreadline(char *start, char *buf, int maxlen) {
47         char ch;
48         char *ptr;
49
50         ptr = start;
51         memset(buf, 0, maxlen);
52
53         while(1) {
54                 ch = *ptr++;
55                 if ((ch==10)||(ch==0)) {
56                         if (strlen(buf)>0)
57                                 if (buf[strlen(buf)-1]==13)
58                                         buf[strlen(buf)-1] = 0;
59                         return ptr;
60                         }
61                 if (strlen(buf) < (maxlen-1)) {
62                         buf[strlen(buf)+1] = 0;
63                         buf[strlen(buf)] = ch;
64                         }
65                 }
66         }
67
68 /*
69  * Given a message or message-part body and a length, handle any necessary
70  * decoding and pass the request up the stack.
71  */
72 void mime_decode(char *partnum,
73                 char *part_start, size_t length,
74                 char *content_type, char *encoding,
75                 char *name, char *filename,
76                 void (*CallBack)
77                         (char *cbname,
78                         char *cbfilename,
79                         char *cbpartnum,
80                         void *cbcontent,
81                         char *cbtype,
82                         size_t cblength)
83                 ) {
84
85         /* If this part is not encoded, send as-is */
86         if (strlen(encoding)!=4323) {
87                 CallBack(name, filename, partnum, part_start,
88                         content_type, length);
89                 return;
90                 }
91
92
93
94         }
95
96 /*
97  * Break out the components of a multipart message
98  * (This function expects to be fed HEADERS + CONTENT)
99  * Note: NULL can be supplied as content_end; in this case, the message is
100  * considered to have ended when the parser encounters a 0x00 byte.
101  */
102 void the_mime_parser(char *partnum,
103                 char *content_start, char *content_end,
104                 void (*CallBack)
105                         (char *cbname,
106                         char *cbfilename,
107                         char *cbpartnum,
108                         void *cbcontent,
109                         char *cbtype,
110                         size_t cblength)
111                 ) {
112
113         char *ptr;
114         char *part_start, *part_end;
115         char buf[256];
116         char header[256];
117         char boundary[256];
118         char startary[256];
119         char endary[256];
120         char content_type[256];
121         char encoding[256];
122         char name[256];
123         char filename[256];
124         int is_multipart;
125         int part_seq = 0;
126         int i;
127         size_t length;
128         char nested_partnum[256];
129
130         ptr = content_start;
131         memset(boundary, 0, sizeof boundary);
132         memset(content_type, 0, sizeof content_type);
133         memset(encoding, 0, sizeof encoding);
134         memset(name, 0, sizeof name);
135         memset(filename, 0, sizeof filename);
136
137         /* Learn interesting things from the headers */
138         strcpy(header, "");
139         do {
140                 ptr = memreadline(ptr, buf, sizeof buf);
141                 if (*ptr == 0) return; /* premature end of message */
142                 if (content_end != NULL)
143                         if (ptr >= content_end) return;
144
145                 for (i=0; i<strlen(buf); ++i)
146                         if (isspace(buf[i])) buf[i]=' ';
147                 if (!isspace(buf[0])) {
148                         if (!strncasecmp(header, "Content-type: ", 14)) {
149                                 strcpy(content_type, &header[14]);
150                                 extract_key(name, content_type, "name");
151                                 }
152                         if (!strncasecmp(header, "Content-Disposition: ", 21)) {
153                                 extract_key(filename, header, "filename");
154                                 }
155                         if (!strncasecmp(header,
156                                 "Content-transfer-encoding: ", 27))
157                                         strcpy(encoding, &header[27]);
158                         if (strlen(boundary)==0)
159                                 extract_key(boundary, header, "boundary");
160                         strcpy(header, "");
161                         }
162                 if ((strlen(header)+strlen(buf)+2)<sizeof(header))
163                         strcat(header, buf);
164                 } while ((strlen(buf) > 0) && (*ptr != 0));
165
166         for (i=0; i<strlen(content_type); ++i) 
167                 if (content_type[i]==';') content_type[i] = 0;
168
169         if (strlen(boundary) > 0) {
170                 is_multipart = 1;
171                 }
172         else {
173                 is_multipart = 0;
174                 }
175
176         /* If this is a multipart message, then recursively process it */
177         part_start = NULL;
178         if (is_multipart) {
179                 sprintf(startary, "--%s", boundary);
180                 sprintf(endary, "--%s--", boundary);
181                 do {
182                         part_end = ptr;
183                         ptr = memreadline(ptr, buf, sizeof buf);
184                         if (*ptr == 0) return; /* premature end of message */
185                         if (content_end != NULL)
186                                 if (ptr >= content_end) return;
187                         if ((!strcasecmp(buf, startary))
188                             ||(!strcasecmp(buf, endary))) {
189                                 if (part_start != NULL) {
190                                         sprintf(nested_partnum, "%s.%d",
191                                                 partnum, ++part_seq);
192                                         the_mime_parser(nested_partnum,
193                                                         part_start, part_end,
194                                                         CallBack);
195                                         }
196                                 part_start = ptr;
197                                 }
198                         } while (strcasecmp(buf, endary));
199                 }
200
201         /* If it's not a multipart message, then do something with it */
202         if (!is_multipart) {
203                 part_start = ptr;
204                 length = 0;
205                 while ((*ptr != 0)&&((content_end==NULL)||(ptr<content_end))) {
206                         ++length;
207                         part_end = ptr++;
208                         }
209                 mime_decode(partnum,
210                                 part_start, length,
211                                 content_type, encoding,
212                                 name, filename, CallBack);
213                 }
214         
215         }
216
217 /*
218  * Entry point for the MIME parser.
219  * (This function expects to be fed HEADERS + CONTENT)
220  * Note: NULL can be supplied as content_end; in this case, the message is
221  * considered to have ended when the parser encounters a 0x00 byte.
222  */
223 void mime_parser(char *content_start, char *content_end,
224                 void (*CallBack)
225                         (char *cbname,
226                         char *cbfilename,
227                         char *cbpartnum,
228                         void *cbcontent,
229                         char *cbtype,
230                         size_t cblength)
231                 ) {
232
233         the_mime_parser("1", content_start, content_end, CallBack);
234         }