]> code.citadel.org Git - citadel.git/blob - citadel/mime_parser.c
* Brought over the mime_parser from WebCit and began preliminary work
[citadel.git] / citadel / mime_parser.c
1 /*
2  * mime_parser.c
3  *
4  * This is a really bad attempt at writing a parser to handle MIME-encoded
5  * data, including multipart messages.  In the case of WebCit, the input data
6  * might be a form containing uploaded files.  In the Citadel server, the data
7  * is more likely to be an actual MIME-encoded message.
8  *
9  * Copyright (c) 1998-1999 by Art Cancro
10  * This code is distributed under the terms of the GNU General Public License.
11  *
12  */
13
14 #include <stdlib.h>
15 #include <unistd.h>
16 #include <stdio.h>
17 #include <signal.h>
18 #include <sys/types.h>
19 #include <ctype.h>
20 #include <string.h>
21 #include "mime_parser.h"
22
23
24
25 void extract_key(char *target, char *source, char *key) {
26         int a, b;
27
28         strcpy(target, source);
29         for (a=0; a<strlen(target); ++a) {
30                 if ((!strncasecmp(&target[a], key, strlen(key)))
31                    && (target[a+strlen(key)]=='=')) {
32                         strcpy(target, &target[a+strlen(key)+1]);
33                         if (target[0]==34) strcpy(target, &target[1]);
34                         for (b=0; b<strlen(target); ++b)
35                                 if (target[b]==34) target[b]=0;
36                         return;
37                         }
38                 }
39         strcpy(target, "");
40         }
41
42
43
44 /*
45  * The very back end for the component handler
46  * (This function expects to be fed CONTENT ONLY, no headers)
47  */
48 void do_something_with_it(char *content,
49                 int length,
50                 char *content_type,
51                 char *content_disposition,
52                 void (*CallBack)
53                         (char *cbname,
54                         char *cbfilename,
55                         char *cbencoding,
56                         void *cbcontent,
57                         char *cbtype,
58                         size_t cblength)
59                 ) {
60         char name[256];
61         char filename[256];
62
63         extract_key(name, content_disposition, " name");
64         extract_key(filename, content_disposition, "filename");
65
66         /* Nested multipart gets recursively fed back into the parser */
67         if (!strncasecmp(content_type, "multipart", 9)) {
68                 mime_parser(content, length, content_type, CallBack);
69                 }
70
71         /**** OTHERWISE, HERE'S WHERE WE HANDLE THE STUFF!! *****/
72
73         CallBack(name, filename, "", content, content_type, length);
74
75         /**** END OF STUFF-HANDLER ****/
76
77         }
78
79
80 /*
81  * Take a part, figure out its length, and do something with it
82  * (This function expects to be fed HEADERS+CONTENT)
83  */
84 void handle_part(char *content,
85                 int part_length,
86                 char *supplied_content_type,
87                 void (*CallBack)
88                         (char *cbname,
89                         char *cbfilename,
90                         char *cbencoding,
91                         void *cbcontent,
92                         char *cbtype,
93                         size_t cblength)
94                 ) {
95         char content_type[256];
96         char content_disposition[256];
97         char *start;
98         char buf[512];
99         int crlf = 0;   /* set to 1 for crlf-style newlines */
100         int actual_length;
101
102         strcpy(content_type, supplied_content_type);
103
104         /* Strip off any leading blank lines. */
105         start = content;
106         while ((!strncmp(start, "\r", 1)) || (!strncmp(start, "\n", 1))) {
107                 ++start;
108                 --part_length;
109                 }
110
111         /* At this point all we have left is the headers and the content. */
112         do {
113                 strcpy(buf, "");
114                 do {
115                         buf[strlen(buf)+1] = 0;
116                         if (strlen(buf)<((sizeof buf)-1)) {
117                                 strncpy(&buf[strlen(buf)], start, 1);
118                                 }
119                         ++start;
120                         --part_length;
121                         } while((buf[strlen(buf)-1] != 10) && (part_length>0));
122                 if (part_length <= 0) return;
123                 buf[strlen(buf)-1] = 0;
124                 if (buf[strlen(buf)-1]==13) {
125                         buf[strlen(buf)-1] = 0;
126                         crlf = 1;
127                         }
128                 if (!strncasecmp(buf, "Content-type: ", 14)) {
129                         strcpy(content_type, &buf[14]);
130                         }
131                 if (!strncasecmp(buf, "Content-disposition: ", 21)) {
132                         strcpy(content_disposition, &buf[21]);
133                         }
134                 } while (strlen(buf)>0);
135         
136         if (crlf) actual_length = part_length - 2;
137         else actual_length = part_length - 1;
138
139         /* Now that we've got this component isolated, what to do with it? */
140         do_something_with_it(start, actual_length,
141                         content_type, content_disposition, CallBack);
142
143         }
144
145         
146 /*
147  * Break out the components of a multipart message
148  * (This function expects to be fed CONTENT ONLY, no headers)
149  */
150
151
152 void mime_parser(char *content,
153                 int ContentLength,
154                 char *ContentType,
155                 void (*CallBack)
156                         (char *cbname,
157                         char *cbfilename,
158                         char *cbencoding,
159                         void *cbcontent,
160                         char *cbtype,
161                         size_t cblength)
162                 ) {
163         char boundary[256];
164         char endary[256];
165         int have_boundary = 0;
166         int a;
167         char *ptr;
168         char *beginning;
169         int bytes_processed = 0;
170         int part_length;
171
172         /* If it's not multipart, don't process it as multipart */
173         if (strncasecmp(ContentType, "multipart", 9)) {
174                 do_something_with_it(content, ContentLength,
175                                 ContentType, "", CallBack);
176                 return;
177                 }
178
179         /* Figure out what the boundary is */
180         strcpy(boundary, ContentType);
181         for (a=0; a<strlen(boundary); ++a) {
182                 if (!strncasecmp(&boundary[a], "boundary=", 9)) {
183                         boundary[0]='-';
184                         boundary[1]='-';
185                         strcpy(&boundary[2], &boundary[a+9]);
186                         have_boundary = 1;
187                         a = 0;
188                         }
189                 if ((boundary[a]==13) || (boundary[a]==10)) {
190                         boundary[a] = 0;
191                         }
192                 }
193         if (boundary[2]==34) {
194                 strcpy(&boundary[2], &boundary[3]);
195                 for (a=2; a<strlen(boundary); ++a)
196                         if (boundary[a]==34) boundary[a]=0;
197                 }
198
199         /* We can't process multipart messages without a boundary. */
200         if (have_boundary == 0) return;
201         strcpy(endary, boundary);
202         strcat(endary, "--");
203         fprintf(stderr, "BOUNDARY: %s\n", boundary);
204
205         ptr = content;
206
207         /* Seek to the beginning of the next boundary */
208         while (bytes_processed < ContentLength) {
209               /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */
210
211                 if (strncasecmp(ptr, boundary, strlen(boundary))) {
212                         ++ptr;
213                         ++bytes_processed;
214                         }
215
216                 /* See if we're at the end */
217                 if (!strncasecmp(ptr, endary, strlen(endary))) {
218                         return;
219                         }
220
221                 /* Seek to the end of the boundary string */
222                 if (!strncasecmp(ptr, boundary, strlen(boundary))) {
223                         fprintf(stderr, "FOUNDA BOUNDA\n");
224                         while ( (bytes_processed < ContentLength)
225                               && (strncasecmp(ptr, "\n", 1)) ) {
226                                 ++ptr;
227                                 ++bytes_processed;
228                                 }
229                         beginning = ptr;
230                         part_length = 0;
231                         while ( (bytes_processed < ContentLength)
232                           && (strncasecmp(ptr, boundary, strlen(boundary))) ) {
233                                 ++ptr;
234                                 ++bytes_processed;
235                                 ++part_length;
236                                 }
237                         handle_part(beginning, part_length, "", CallBack);
238                         /* Back off so we can see the next boundary */
239                         --ptr;
240                         --bytes_processed;
241                         }
242                 }
243         }