* Rewrote the HTTP engine and application coupling to run in a worker thread
[citadel.git] / webcit / mime_parser.c
1 /*
2  * mime_parser.c
3  *
4  * This is a really bad attempt at writing a parser to handle multipart
5  * messages -- in the case of WebCit, a form containing uploaded files.
6  */
7
8
9
10 #include <ctype.h>
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <stdio.h>
14 #include <fcntl.h>
15 #include <signal.h>
16 #include <sys/types.h>
17 #include <sys/wait.h>
18 #include <sys/socket.h>
19 #include <sys/time.h>
20 #include <limits.h>
21 #include <netinet/in.h>
22 #include <netdb.h>
23 #include <string.h>
24 #include <pwd.h>
25 #include <errno.h>
26 #include <stdarg.h>
27 #include <pthread.h>
28 #include <signal.h>
29 #include "webcit.h"
30
31
32
33
34
35 void extract_key(char *target, char *source, char *key)
36 {
37         int a, b;
38
39         strcpy(target, source);
40         for (a = 0; a < strlen(target); ++a) {
41                 if ((!strncasecmp(&target[a], key, strlen(key)))
42                     && (target[a + strlen(key)] == '=')) {
43                         strcpy(target, &target[a + strlen(key) + 1]);
44                         if (target[0] == 34)
45                                 strcpy(target, &target[1]);
46                         for (b = 0; b < strlen(target); ++b)
47                                 if (target[b] == 34)
48                                         target[b] = 0;
49                         return;
50                 }
51         }
52         strcpy(target, "");
53 }
54
55
56
57 /*
58  * The very back end for the component handler
59  * (This function expects to be fed CONTENT ONLY, no headers)
60  */
61 void do_something_with_it(char *content,
62                           int length,
63                           char *content_type,
64                           char *content_disposition,
65                           void (*CallBack)
66                            (char *cbname,
67                             char *cbfilename,
68                             char *cbencoding,
69                             void *cbcontent,
70                             char *cbtype,
71                             size_t cblength)
72 )
73 {
74         char name[256];
75         char filename[256];
76
77         extract_key(name, content_disposition, " name");
78         extract_key(filename, content_disposition, "filename");
79
80         /* Nested multipart gets recursively fed back into the parser */
81         if (!strncasecmp(content_type, "multipart", 9)) {
82                 mime_parser(content, length, content_type, CallBack);
83         }
84 /**** OTHERWISE, HERE'S WHERE WE HANDLE THE STUFF!! *****/
85
86         CallBack(name, filename, "", content, content_type, length);
87
88 /**** END OF STUFF-HANDLER ****/
89
90 }
91
92
93 /*
94  * Take a part, figure out its length, and do something with it
95  * (This function expects to be fed HEADERS+CONTENT)
96  */
97 void handle_part(char *content,
98                  int part_length,
99                  char *supplied_content_type,
100                  void (*CallBack)
101                   (char *cbname,
102                    char *cbfilename,
103                    char *cbencoding,
104                    void *cbcontent,
105                    char *cbtype,
106                    size_t cblength)
107 )
108 {
109         char content_type[256];
110         char content_disposition[256];
111         char *start;
112         char buf[512];
113         int crlf = 0;           /* set to 1 for crlf-style newlines */
114         int actual_length;
115
116         strcpy(content_type, supplied_content_type);
117
118         /* Strip off any leading blank lines. */
119         start = content;
120         while ((!strncmp(start, "\r", 1)) || (!strncmp(start, "\n", 1))) {
121                 ++start;
122                 --part_length;
123         }
124
125         /* At this point all we have left is the headers and the content. */
126         do {
127                 strcpy(buf, "");
128                 do {
129                         buf[strlen(buf) + 1] = 0;
130                         if (strlen(buf) < ((sizeof buf) - 1)) {
131                                 strncpy(&buf[strlen(buf)], start, 1);
132                         }
133                         ++start;
134                         --part_length;
135                 } while ((buf[strlen(buf) - 1] != 10) && (part_length > 0));
136                 if (part_length <= 0)
137                         return;
138                 buf[strlen(buf) - 1] = 0;
139                 if (buf[strlen(buf) - 1] == 13) {
140                         buf[strlen(buf) - 1] = 0;
141                         crlf = 1;
142                 }
143                 if (!strncasecmp(buf, "Content-type: ", 14)) {
144                         strcpy(content_type, &buf[14]);
145                 }
146                 if (!strncasecmp(buf, "Content-disposition: ", 21)) {
147                         strcpy(content_disposition, &buf[21]);
148                 }
149         } while (strlen(buf) > 0);
150
151         if (crlf)
152                 actual_length = part_length - 2;
153         else
154                 actual_length = part_length - 1;
155
156         /* Now that we've got this component isolated, what to do with it? */
157         do_something_with_it(start, actual_length,
158                              content_type, content_disposition, CallBack);
159
160 }
161
162
163 /*
164  * Break out the components of a multipart message
165  * (This function expects to be fed CONTENT ONLY, no headers)
166  */
167
168
169 void mime_parser(char *content,
170                  int ContentLength,
171                  char *ContentType,
172                  void (*CallBack)
173                   (char *cbname,
174                    char *cbfilename,
175                    char *cbencoding,
176                    void *cbcontent,
177                    char *cbtype,
178                    size_t cblength)
179 )
180 {
181         char boundary[256];
182         char endary[256];
183         int have_boundary = 0;
184         int a;
185         char *ptr;
186         char *beginning;
187         int bytes_processed = 0;
188         int part_length;
189
190         /* If it's not multipart, don't process it as multipart */
191         if (strncasecmp(ContentType, "multipart", 9)) {
192                 do_something_with_it(content, ContentLength,
193                                      ContentType, "", CallBack);
194                 return;
195         }
196         /* Figure out what the boundary is */
197         strcpy(boundary, ContentType);
198         for (a = 0; a < strlen(boundary); ++a) {
199                 if (!strncasecmp(&boundary[a], "boundary=", 9)) {
200                         boundary[0] = '-';
201                         boundary[1] = '-';
202                         strcpy(&boundary[2], &boundary[a + 9]);
203                         have_boundary = 1;
204                         a = 0;
205                 }
206                 if ((boundary[a] == 13) || (boundary[a] == 10)) {
207                         boundary[a] = 0;
208                 }
209         }
210
211         /* We can't process multipart messages without a boundary. */
212         if (have_boundary == 0)
213                 return;
214         strcpy(endary, boundary);
215         strcat(endary, "--");
216
217         ptr = content;
218
219         /* Seek to the beginning of the next boundary */
220         while (bytes_processed < ContentLength) {
221                 /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */
222
223                 if (strncasecmp(ptr, boundary, strlen(boundary))) {
224                         ++ptr;
225                         ++bytes_processed;
226                 }
227                 /* See if we're at the end */
228                 if (!strncasecmp(ptr, endary, strlen(endary))) {
229                         return;
230                 }
231                 /* Seek to the end of the boundary string */
232                 if (!strncasecmp(ptr, boundary, strlen(boundary))) {
233                         while ((bytes_processed < ContentLength)
234                                && (strncasecmp(ptr, "\n", 1))) {
235                                 ++ptr;
236                                 ++bytes_processed;
237                         }
238                         beginning = ptr;
239                         part_length = 0;
240                         while ((bytes_processed < ContentLength)
241                                && (strncasecmp(ptr, boundary, strlen(boundary)))) {
242                                 ++ptr;
243                                 ++bytes_processed;
244                                 ++part_length;
245                         }
246                         handle_part(beginning, part_length, "", CallBack);
247                         /* Back off so we can see the next boundary */
248                         --ptr;
249                         --bytes_processed;
250                 }
251         }
252 }