b205cb175895b2620c621d345dfa3b12df4e3e34
[citadel.git] / webcit / mime_parser.c
1 /*
2  * mime_parser.c
3  *
4  * This is a really bad attempt at writing a parser to handle multipart
5  * messages -- in the case of WebCit, a form containing uploaded files.
6  */
7
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <stdio.h>
11 #include <signal.h>
12 #include <sys/types.h>
13 #include <ctype.h>
14 #include <string.h>
15 #include "webcit.h"
16 #include "child.h"
17
18
19
20 void extract_key(char *target, char *source, char *key) {
21         int a, b;
22
23         strcpy(target, source);
24         for (a=0; a<strlen(target); ++a) {
25                 if ((!strncasecmp(&target[a], key, strlen(key)))
26                    && (target[a+strlen(key)]=='=')) {
27                         strcpy(target, &target[a+strlen(key)+1]);
28                         if (target[0]==34) strcpy(target, &target[1]);
29                         for (b=0; b<strlen(target); ++b)
30                                 if (target[b]==34) target[b]=0;
31                         return;
32                         }
33                 }
34         strcpy(target, "");
35         }
36
37
38
39 /*
40  * The very back end for the component handler
41  * (This function expects to be fed CONTENT ONLY, no headers)
42  */
43 void do_something_with_it(char *content, int length, char *content_type,
44                         char *content_disposition) {
45         char name[256];
46         char filename[256];
47
48         extract_key(name, content_disposition, " name");
49         extract_key(filename, content_disposition, "filename");
50
51         /* Nested multipart gets recursively fed back into the parser */
52         if (!strncasecmp(content_type, "multipart", 9)) {
53                 mime_parser(content, length, content_type);
54                 }
55
56         /**** OTHERWISE, HERE'S WHERE WE HANDLE THE STUFF!! ****
57          * Later we'll want to do this with a callback.  We'll also want to
58          * handle content-transfer-encoding before passing control to callback
59          * functions.  For now, though ... it's just a hardcoded WebCit tie-in.
60          */
61
62         else if (strlen(name)>0) {
63                 
64                 }
65
66         }
67
68
69 /*
70  * Take a part, figure out its length, and do something with it
71  * (This function expects to be fed HEADERS+CONTENT)
72  */
73 void handle_part(char *content, int part_length, char *supplied_content_type) {
74         char content_type[256];
75         char content_disposition[256];
76         char *start;
77         char buf[512];
78         int crlf = 0;   /* set to 1 for crlf-style newlines */
79         int actual_length;
80
81         strcpy(content_type, supplied_content_type);
82
83         /* Strip off any leading blank lines. */
84         start = content;
85         while ((!strncmp(start, "\r", 1)) || (!strncmp(start, "\n", 1))) {
86                 ++start;
87                 --part_length;
88                 }
89
90         /* At this point all we have left is the headers and the content. */
91         do {
92                 strcpy(buf, "");
93                 do {
94                         buf[strlen(buf)+1] = 0;
95                         if (strlen(buf)<((sizeof buf)-1)) {
96                                 strncpy(&buf[strlen(buf)], start, 1);
97                                 }
98                         ++start;
99                         --part_length;
100                         } while((buf[strlen(buf)-1] != 10) && (part_length>0));
101                 if (part_length <= 0) return;
102                 buf[strlen(buf)-1] = 0;
103                 if (buf[strlen(buf)-1]==13) {
104                         buf[strlen(buf)-1] = 0;
105                         crlf = 1;
106                         }
107                 if (!strncasecmp(buf, "Content-type: ", 14)) {
108                         strcpy(content_type, &buf[14]);
109                         }
110                 if (!strncasecmp(buf, "Content-disposition: ", 21)) {
111                         strcpy(content_disposition, &buf[21]);
112                         }
113                 } while (strlen(buf)>0);
114         
115         if (crlf) actual_length = part_length - 2;
116         else actual_length = part_length - 1;
117
118         /* Now that we've got this component isolated, what to do with it? */
119         do_something_with_it(start, actual_length,
120                                 content_type, content_disposition);
121
122         }
123
124         
125 /*
126  * Break out the components of a multipart message
127  * (This function expects to be fed CONTENT ONLY, no headers)
128  */
129 void mime_parser(char *content, int ContentLength, char *ContentType) {
130         char boundary[256];
131         char endary[256];
132         int have_boundary = 0;
133         int a;
134         char *ptr;
135         char *beginning;
136         int bytes_processed = 0;
137         int part_length;
138
139         /* If it's not multipart, don't process it as multipart */
140         if (strncasecmp(ContentType, "multipart", 9)) {
141                 do_something_with_it(content, ContentLength, ContentType, "");
142                 return;
143                 }
144
145         /* Figure out what the boundary is */
146         strcpy(boundary, ContentType);
147         for (a=0; a<strlen(boundary); ++a) {
148                 if (!strncasecmp(&boundary[a], "boundary=", 9)) {
149                         boundary[0]='-';
150                         boundary[1]='-';
151                         strcpy(&boundary[2], &boundary[a+9]);
152                         have_boundary = 1;
153                         a = 0;
154                         }
155                 if ((boundary[a]==13) || (boundary[a]==10)) {
156                         boundary[a] = 0;
157                         }
158                 }
159
160         /* We can't process multipart messages without a boundary. */
161         if (have_boundary == 0) return;
162         strcpy(endary, boundary);
163         strcat(endary, "--");
164
165         ptr = content;
166
167         /* Seek to the beginning of the next boundary */
168         while (bytes_processed < ContentLength) {
169               /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */
170
171                 if (strncasecmp(ptr, boundary, strlen(boundary))) {
172                         ++ptr;
173                         ++bytes_processed;
174                         }
175
176                 /* See if we're at the end */
177                 if (!strncasecmp(ptr, endary, strlen(endary))) {
178                         return;
179                         }
180
181                 /* Seek to the end of the boundary string */
182                 if (!strncasecmp(ptr, boundary, strlen(boundary))) {
183                         while ( (bytes_processed < ContentLength)
184                               && (strncasecmp(ptr, "\n", 1)) ) {
185                                 ++ptr;
186                                 ++bytes_processed;
187                                 }
188                         beginning = ptr;
189                         part_length = 0;
190                         while ( (bytes_processed < ContentLength)
191                           && (strncasecmp(ptr, boundary, strlen(boundary))) ) {
192                                 ++ptr;
193                                 ++bytes_processed;
194                                 ++part_length;
195                                 }
196                         handle_part(beginning, part_length, "");
197                         /* Back off so we can see the next boundary */
198                         --ptr;
199                         --bytes_processed;
200                         }
201                 }
202         }