]> code.citadel.org Git - citadel.git/blob - webcit/mime_parser.c
* Modified the back end of mime_parser to use callbacks instead of
[citadel.git] / webcit / mime_parser.c
1 /*
2  * mime_parser.c
3  *
4  * This is a really bad attempt at writing a parser to handle multipart
5  * messages -- in the case of WebCit, a form containing uploaded files.
6  */
7
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <signal.h>
12 #include <sys/types.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include "mime_parser.h"
16 #include "webcit.h"
17 #include "child.h"
18
19
20
21 void extract_key(char *target, char *source, char *key) {
22         int a, b;
23
24         strcpy(target, source);
25         for (a=0; a<strlen(target); ++a) {
26                 if ((!strncasecmp(&target[a], key, strlen(key)))
27                    && (target[a+strlen(key)]=='=')) {
28                         strcpy(target, &target[a+strlen(key)+1]);
29                         if (target[0]==34) strcpy(target, &target[1]);
30                         for (b=0; b<strlen(target); ++b)
31                                 if (target[b]==34) target[b]=0;
32                         return;
33                         }
34                 }
35         strcpy(target, "");
36         }
37
38
39
40 /*
41  * The very back end for the component handler
42  * (This function expects to be fed CONTENT ONLY, no headers)
43  */
44 void do_something_with_it(char *content,
45                 int length,
46                 char *content_type,
47                 char *content_disposition,
48                 void (*CallBack)
49                         (char *cbname,
50                         char *cbfilename,
51                         char *cbencoding,
52                         void *cbcontent,
53                         size_t cblength)
54                 ) {
55         char name[256];
56         char filename[256];
57
58         extract_key(name, content_disposition, " name");
59         extract_key(filename, content_disposition, "filename");
60
61         /* Nested multipart gets recursively fed back into the parser */
62         if (!strncasecmp(content_type, "multipart", 9)) {
63                 mime_parser(content, length, content_type, CallBack);
64                 }
65
66         /**** OTHERWISE, HERE'S WHERE WE HANDLE THE STUFF!! *****/
67
68         CallBack(name, filename, "", content, length);
69
70         /**** END OF STUFF-HANDLER ****/
71
72         }
73
74
75 /*
76  * Take a part, figure out its length, and do something with it
77  * (This function expects to be fed HEADERS+CONTENT)
78  */
79 void handle_part(char *content,
80                 int part_length,
81                 char *supplied_content_type,
82                 void (*CallBack)
83                         (char *cbname,
84                         char *cbfilename,
85                         char *cbencoding,
86                         void *cbcontent,
87                         size_t cblength)
88                 ) {
89         char content_type[256];
90         char content_disposition[256];
91         char *start;
92         char buf[512];
93         int crlf = 0;   /* set to 1 for crlf-style newlines */
94         int actual_length;
95
96         strcpy(content_type, supplied_content_type);
97
98         /* Strip off any leading blank lines. */
99         start = content;
100         while ((!strncmp(start, "\r", 1)) || (!strncmp(start, "\n", 1))) {
101                 ++start;
102                 --part_length;
103                 }
104
105         /* At this point all we have left is the headers and the content. */
106         do {
107                 strcpy(buf, "");
108                 do {
109                         buf[strlen(buf)+1] = 0;
110                         if (strlen(buf)<((sizeof buf)-1)) {
111                                 strncpy(&buf[strlen(buf)], start, 1);
112                                 }
113                         ++start;
114                         --part_length;
115                         } while((buf[strlen(buf)-1] != 10) && (part_length>0));
116                 if (part_length <= 0) return;
117                 buf[strlen(buf)-1] = 0;
118                 if (buf[strlen(buf)-1]==13) {
119                         buf[strlen(buf)-1] = 0;
120                         crlf = 1;
121                         }
122                 if (!strncasecmp(buf, "Content-type: ", 14)) {
123                         strcpy(content_type, &buf[14]);
124                         }
125                 if (!strncasecmp(buf, "Content-disposition: ", 21)) {
126                         strcpy(content_disposition, &buf[21]);
127                         }
128                 } while (strlen(buf)>0);
129         
130         if (crlf) actual_length = part_length - 2;
131         else actual_length = part_length - 1;
132
133         /* Now that we've got this component isolated, what to do with it? */
134         do_something_with_it(start, actual_length,
135                         content_type, content_disposition, CallBack);
136
137         }
138
139         
140 /*
141  * Break out the components of a multipart message
142  * (This function expects to be fed CONTENT ONLY, no headers)
143  */
144
145
146 void mime_parser(char *content,
147                 int ContentLength,
148                 char *ContentType,
149                 void (*CallBack)
150                         (char *cbname,
151                         char *cbfilename,
152                         char *cbencoding,
153                         void *cbcontent,
154                         size_t cblength)
155                 ) {
156         char boundary[256];
157         char endary[256];
158         int have_boundary = 0;
159         int a;
160         char *ptr;
161         char *beginning;
162         int bytes_processed = 0;
163         int part_length;
164
165         /* If it's not multipart, don't process it as multipart */
166         if (strncasecmp(ContentType, "multipart", 9)) {
167                 do_something_with_it(content, ContentLength,
168                                 ContentType, "", CallBack);
169                 return;
170                 }
171
172         /* Figure out what the boundary is */
173         strcpy(boundary, ContentType);
174         for (a=0; a<strlen(boundary); ++a) {
175                 if (!strncasecmp(&boundary[a], "boundary=", 9)) {
176                         boundary[0]='-';
177                         boundary[1]='-';
178                         strcpy(&boundary[2], &boundary[a+9]);
179                         have_boundary = 1;
180                         a = 0;
181                         }
182                 if ((boundary[a]==13) || (boundary[a]==10)) {
183                         boundary[a] = 0;
184                         }
185                 }
186
187         /* We can't process multipart messages without a boundary. */
188         if (have_boundary == 0) return;
189         strcpy(endary, boundary);
190         strcat(endary, "--");
191
192         ptr = content;
193
194         /* Seek to the beginning of the next boundary */
195         while (bytes_processed < ContentLength) {
196               /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */
197
198                 if (strncasecmp(ptr, boundary, strlen(boundary))) {
199                         ++ptr;
200                         ++bytes_processed;
201                         }
202
203                 /* See if we're at the end */
204                 if (!strncasecmp(ptr, endary, strlen(endary))) {
205                         return;
206                         }
207
208                 /* Seek to the end of the boundary string */
209                 if (!strncasecmp(ptr, boundary, strlen(boundary))) {
210                         while ( (bytes_processed < ContentLength)
211                               && (strncasecmp(ptr, "\n", 1)) ) {
212                                 ++ptr;
213                                 ++bytes_processed;
214                                 }
215                         beginning = ptr;
216                         part_length = 0;
217                         while ( (bytes_processed < ContentLength)
218                           && (strncasecmp(ptr, boundary, strlen(boundary))) ) {
219                                 ++ptr;
220                                 ++bytes_processed;
221                                 ++part_length;
222                                 }
223                         handle_part(beginning, part_length, "", CallBack);
224                         /* Back off so we can see the next boundary */
225                         --ptr;
226                         --bytes_processed;
227                         }
228                 }
229         }