webcit/mime_parser.c

   1 /*
   2  * mime_parser.c
   3  *
   4  * This is a really bad attempt at writing a parser to handle multipart
   5  * messages -- in the case of WebCit, a form containing uploaded files.
   6  */
   7
   8 #include <stdlib.h>
   9 #include <unistd.h>
  10 #include <stdio.h>
  11 #include <signal.h>
  12 #include <sys/types.h>
  13 #include <ctype.h>
  14 #include <string.h>
  15 #include "webcit.h"
  16 #include "child.h"
  17
  18
  19
  20 void extract_key(char *target, char *source, char *key) {
  21         int a, b;
  22
  23         strcpy(target, source);
  24         for (a=0; a<strlen(target); ++a) {
  25                 if ((!strncasecmp(&target[a], key, strlen(key)))
  26                    && (target[a+strlen(key)]=='=')) {
  27                         strcpy(target, &target[a+strlen(key)+1]);
  28                         if (target[0]==34) strcpy(target, &target[1]);
  29                         for (b=0; b<strlen(target); ++b)
  30                                 if (target[b]==34) target[b]=0;
  31                         return;
  32                         }
  33                 }
  34         strcpy(target, "");
  35         }
  36
  37
  38
  39 /*
  40  * The very back end for the component handler
  41  * (This function expects to be fed CONTENT ONLY, no headers)
  42  */
  43 void do_something_with_it(char *content, int length, char *content_type,
  44                         char *content_disposition) {
  45         char name[256];
  46         char filename[256];
  47
  48         extract_key(name, content_disposition, " name");
  49         extract_key(filename, content_disposition, "filename");
  50
  51         /* Nested multipart gets recursively fed back into the parser */
  52         if (!strncasecmp(content_type, "multipart", 9)) {
  53                 mime_parser(content, length, content_type);
  54                 }
  55
  56         /**** OTHERWISE, HERE'S WHERE WE HANDLE THE STUFF!! ****
  57          * Later we'll want to do this with a callback.  We'll also want to
  58          * handle content-transfer-encoding before passing control to callback
  59          * functions.  For now, though ... it's just a hardcoded WebCit tie-in.
  60          */
  61
  62         else if (strlen(name)>0) {
  63
  64                 }
  65
  66         }
  67
  68
  69 /*
  70  * Take a part, figure out its length, and do something with it
  71  * (This function expects to be fed HEADERS+CONTENT)
  72  */
  73 void handle_part(char *content, int part_length, char *supplied_content_type) {
  74         char content_type[256];
  75         char content_disposition[256];
  76         char *start;
  77         char buf[512];
  78         int crlf = 0;   /* set to 1 for crlf-style newlines */
  79         int actual_length;
  80
  81         strcpy(content_type, supplied_content_type);
  82
  83         /* Strip off any leading blank lines. */
  84         start = content;
  85         while ((!strncmp(start, "\r", 1)) || (!strncmp(start, "\n", 1))) {
  86                 ++start;
  87                 --part_length;
  88                 }
  89
  90         /* At this point all we have left is the headers and the content. */
  91         do {
  92                 strcpy(buf, "");
  93                 do {
  94                         buf[strlen(buf)+1] = 0;
  95                         if (strlen(buf)<((sizeof buf)-1)) {
  96                                 strncpy(&buf[strlen(buf)], start, 1);
  97                                 }
  98                         ++start;
  99                         --part_length;
 100                         } while((buf[strlen(buf)-1] != 10) && (part_length>0));
 101                 if (part_length <= 0) return;
 102                 buf[strlen(buf)-1] = 0;
 103                 if (buf[strlen(buf)-1]==13) {
 104                         buf[strlen(buf)-1] = 0;
 105                         crlf = 1;
 106                         }
 107                 if (!strncasecmp(buf, "Content-type: ", 14)) {
 108                         strcpy(content_type, &buf[14]);
 109                         }
 110                 if (!strncasecmp(buf, "Content-disposition: ", 21)) {
 111                         strcpy(content_disposition, &buf[21]);
 112                         }
 113                 } while (strlen(buf)>0);
 114
 115         if (crlf) actual_length = part_length - 2;
 116         else actual_length = part_length - 1;
 117
 118         /* Now that we've got this component isolated, what to do with it? */
 119         do_something_with_it(start, actual_length,
 120                                 content_type, content_disposition);
 121
 122         }
 123
 124
 125 /*
 126  * Break out the components of a multipart message
 127  * (This function expects to be fed CONTENT ONLY, no headers)
 128  */
 129 void mime_parser(char *content, int ContentLength, char *ContentType) {
 130         char boundary[256];
 131         char endary[256];
 132         int have_boundary = 0;
 133         int a;
 134         char *ptr;
 135         char *beginning;
 136         int bytes_processed = 0;
 137         int part_length;
 138
 139         /* If it's not multipart, don't process it as multipart */
 140         if (strncasecmp(ContentType, "multipart", 9)) {
 141                 do_something_with_it(content, ContentLength, ContentType, "");
 142                 return;
 143                 }
 144
 145         /* Figure out what the boundary is */
 146         strcpy(boundary, ContentType);
 147         for (a=0; a<strlen(boundary); ++a) {
 148                 if (!strncasecmp(&boundary[a], "boundary=", 9)) {
 149                         boundary[0]='-';
 150                         boundary[1]='-';
 151                         strcpy(&boundary[2], &boundary[a+9]);
 152                         have_boundary = 1;
 153                         a = 0;
 154                         }
 155                 if ((boundary[a]==13) || (boundary[a]==10)) {
 156                         boundary[a] = 0;
 157                         }
 158                 }
 159
 160         /* We can't process multipart messages without a boundary. */
 161         if (have_boundary == 0) return;
 162         strcpy(endary, boundary);
 163         strcat(endary, "--");
 164
 165         ptr = content;
 166
 167         /* Seek to the beginning of the next boundary */
 168         while (bytes_processed < ContentLength) {
 169               /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */
 170
 171                 if (strncasecmp(ptr, boundary, strlen(boundary))) {
 172                         ++ptr;
 173                         ++bytes_processed;
 174                         }
 175
 176                 /* See if we're at the end */
 177                 if (!strncasecmp(ptr, endary, strlen(endary))) {
 178                         return;
 179                         }
 180
 181                 /* Seek to the end of the boundary string */
 182                 if (!strncasecmp(ptr, boundary, strlen(boundary))) {
 183                         while ( (bytes_processed < ContentLength)
 184                               && (strncasecmp(ptr, "\n", 1)) ) {
 185                                 ++ptr;
 186                                 ++bytes_processed;
 187                                 }
 188                         beginning = ptr;
 189                         part_length = 0;
 190                         while ( (bytes_processed < ContentLength)
 191                           && (strncasecmp(ptr, boundary, strlen(boundary))) ) {
 192                                 ++ptr;
 193                                 ++bytes_processed;
 194                                 ++part_length;
 195                                 }
 196                         handle_part(beginning, part_length, "");
 197                         /* Back off so we can see the next boundary */
 198                         --ptr;
 199                         --bytes_processed;
 200                         }
 201                 }
 202         }