]> code.citadel.org Git - citadel.git/blob - webcit/mime_parser.c
ed23f35181fc323f47fb5502b625a49e60a68a6f
[citadel.git] / webcit / mime_parser.c
1 /*
2  * mime_parser.c
3  *
4  * This is a really bad attempt at writing a parser to handle multipart
5  * messages -- in the case of WebCit, a form containing uploaded files.
6  */
7
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <signal.h>
12 #include <sys/types.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include "mime_parser.h"
16 #include "webcit.h"
17 #include "child.h"
18
19
20
21 void extract_key(char *target, char *source, char *key) {
22         int a, b;
23
24         strcpy(target, source);
25         for (a=0; a<strlen(target); ++a) {
26                 if ((!strncasecmp(&target[a], key, strlen(key)))
27                    && (target[a+strlen(key)]=='=')) {
28                         strcpy(target, &target[a+strlen(key)+1]);
29                         if (target[0]==34) strcpy(target, &target[1]);
30                         for (b=0; b<strlen(target); ++b)
31                                 if (target[b]==34) target[b]=0;
32                         return;
33                         }
34                 }
35         strcpy(target, "");
36         }
37
38
39
40 /*
41  * The very back end for the component handler
42  * (This function expects to be fed CONTENT ONLY, no headers)
43  */
44 void do_something_with_it(char *content,
45                 int length,
46                 char *content_type,
47                 char *content_disposition,
48                 void (*CallBack)
49                         (char *cbname,
50                         char *cbfilename,
51                         char *cbencoding,
52                         void *cbcontent,
53                         char *cbtype,
54                         size_t cblength)
55                 ) {
56         char name[256];
57         char filename[256];
58
59         extract_key(name, content_disposition, " name");
60         extract_key(filename, content_disposition, "filename");
61
62         /* Nested multipart gets recursively fed back into the parser */
63         if (!strncasecmp(content_type, "multipart", 9)) {
64                 mime_parser(content, length, content_type, CallBack);
65                 }
66
67         /**** OTHERWISE, HERE'S WHERE WE HANDLE THE STUFF!! *****/
68
69         CallBack(name, filename, "", content, content_type, length);
70
71         /**** END OF STUFF-HANDLER ****/
72
73         }
74
75
76 /*
77  * Take a part, figure out its length, and do something with it
78  * (This function expects to be fed HEADERS+CONTENT)
79  */
80 void handle_part(char *content,
81                 int part_length,
82                 char *supplied_content_type,
83                 void (*CallBack)
84                         (char *cbname,
85                         char *cbfilename,
86                         char *cbencoding,
87                         void *cbcontent,
88                         char *cbtype,
89                         size_t cblength)
90                 ) {
91         char content_type[256];
92         char content_disposition[256];
93         char *start;
94         char buf[512];
95         int crlf = 0;   /* set to 1 for crlf-style newlines */
96         int actual_length;
97
98         strcpy(content_type, supplied_content_type);
99
100         /* Strip off any leading blank lines. */
101         start = content;
102         while ((!strncmp(start, "\r", 1)) || (!strncmp(start, "\n", 1))) {
103                 ++start;
104                 --part_length;
105                 }
106
107         /* At this point all we have left is the headers and the content. */
108         do {
109                 strcpy(buf, "");
110                 do {
111                         buf[strlen(buf)+1] = 0;
112                         if (strlen(buf)<((sizeof buf)-1)) {
113                                 strncpy(&buf[strlen(buf)], start, 1);
114                                 }
115                         ++start;
116                         --part_length;
117                         } while((buf[strlen(buf)-1] != 10) && (part_length>0));
118                 if (part_length <= 0) return;
119                 buf[strlen(buf)-1] = 0;
120                 if (buf[strlen(buf)-1]==13) {
121                         buf[strlen(buf)-1] = 0;
122                         crlf = 1;
123                         }
124                 if (!strncasecmp(buf, "Content-type: ", 14)) {
125                         strcpy(content_type, &buf[14]);
126                         }
127                 if (!strncasecmp(buf, "Content-disposition: ", 21)) {
128                         strcpy(content_disposition, &buf[21]);
129                         }
130                 } while (strlen(buf)>0);
131         
132         if (crlf) actual_length = part_length - 2;
133         else actual_length = part_length - 1;
134
135         /* Now that we've got this component isolated, what to do with it? */
136         do_something_with_it(start, actual_length,
137                         content_type, content_disposition, CallBack);
138
139         }
140
141         
142 /*
143  * Break out the components of a multipart message
144  * (This function expects to be fed CONTENT ONLY, no headers)
145  */
146
147
148 void mime_parser(char *content,
149                 int ContentLength,
150                 char *ContentType,
151                 void (*CallBack)
152                         (char *cbname,
153                         char *cbfilename,
154                         char *cbencoding,
155                         void *cbcontent,
156                         char *cbtype,
157                         size_t cblength)
158                 ) {
159         char boundary[256];
160         char endary[256];
161         int have_boundary = 0;
162         int a;
163         char *ptr;
164         char *beginning;
165         int bytes_processed = 0;
166         int part_length;
167
168         /* If it's not multipart, don't process it as multipart */
169         if (strncasecmp(ContentType, "multipart", 9)) {
170                 do_something_with_it(content, ContentLength,
171                                 ContentType, "", CallBack);
172                 return;
173                 }
174
175         /* Figure out what the boundary is */
176         strcpy(boundary, ContentType);
177         for (a=0; a<strlen(boundary); ++a) {
178                 if (!strncasecmp(&boundary[a], "boundary=", 9)) {
179                         boundary[0]='-';
180                         boundary[1]='-';
181                         strcpy(&boundary[2], &boundary[a+9]);
182                         have_boundary = 1;
183                         a = 0;
184                         }
185                 if ((boundary[a]==13) || (boundary[a]==10)) {
186                         boundary[a] = 0;
187                         }
188                 }
189
190         /* We can't process multipart messages without a boundary. */
191         if (have_boundary == 0) return;
192         strcpy(endary, boundary);
193         strcat(endary, "--");
194
195         ptr = content;
196
197         /* Seek to the beginning of the next boundary */
198         while (bytes_processed < ContentLength) {
199               /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */
200
201                 if (strncasecmp(ptr, boundary, strlen(boundary))) {
202                         ++ptr;
203                         ++bytes_processed;
204                         }
205
206                 /* See if we're at the end */
207                 if (!strncasecmp(ptr, endary, strlen(endary))) {
208                         return;
209                         }
210
211                 /* Seek to the end of the boundary string */
212                 if (!strncasecmp(ptr, boundary, strlen(boundary))) {
213                         while ( (bytes_processed < ContentLength)
214                               && (strncasecmp(ptr, "\n", 1)) ) {
215                                 ++ptr;
216                                 ++bytes_processed;
217                                 }
218                         beginning = ptr;
219                         part_length = 0;
220                         while ( (bytes_processed < ContentLength)
221                           && (strncasecmp(ptr, boundary, strlen(boundary))) ) {
222                                 ++ptr;
223                                 ++bytes_processed;
224                                 ++part_length;
225                                 }
226                         handle_part(beginning, part_length, "", CallBack);
227                         /* Back off so we can see the next boundary */
228                         --ptr;
229                         --bytes_processed;
230                         }
231                 }
232         }