dkim.c: added, but not yet integrated

author Art Cancro <ajc@citadel.org>

Wed, 8 May 2024 17:15:17 +0000 (10:15 -0700)

committer Art Cancro <ajc@citadel.org>

Wed, 8 May 2024 17:15:17 +0000 (10:15 -0700)
author Art Cancro <ajc@citadel.org>
Wed, 8 May 2024 17:15:17 +0000 (10:15 -0700)
committer Art Cancro <ajc@citadel.org>
Wed, 8 May 2024 17:15:17 +0000 (10:15 -0700)
diff --git a/citadel/server/modules/smtp/dkim.c b/citadel/server/modules/smtp/dkim.c

new file mode 100644 (file)

index 0000000..6a6b9ef
--- /dev/null
+++ b/citadel/server/modules/smtp/dkim.c
@@ -0,0 +1,531 @@
+// DKIM signature creation
+// https://www.rfc-editor.org/rfc/rfc6376.html#section-5
+//
+// Copyright (c) 2024 by Art Cancro
+// This program is open source software.  Use, duplication, or disclosure is subject to the GNU General Public License v3.
+
+// Make sure we don't accidentally use any deprecated API calls
+#define OPENSSL_NO_DEPRECATED_3_0
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+#include <openssl/rand.h>
+#include <openssl/rsa.h>
+#include <openssl/engine.h>
+#include <openssl/sha.h>
+#include <openssl/hmac.h>
+#include <openssl/evp.h>
+#include <openssl/bio.h>
+#include <openssl/pem.h>
+#include <openssl/buffer.h>
+#include <openssl/err.h>
+#include <libcitadel.h>
+
+
+char *dkim_rtrim(char *str) {
+       char *end;
+       int len = strlen(str);
+
+       while (*str && len) {
+               end = str + len-1;
+               
+               if (*end == ' ' || *end == '\t') {
+                       *end = '\0';
+               }
+               else {
+                       break;
+               }
+               
+               len = strlen(str);
+       }
+       
+       return str;
+}
+
+
+char *dkim_rtrim_lines(char *str) {
+       char *end;
+       int len = strlen(str);
+
+       while (*str && len) {
+               end = str + len-1;
+               
+               if (*end == '\r' || *end == '\n') {
+                       *end = '\0';
+               }
+               else {
+                       break;
+               }
+               
+               len = strlen(str);
+       }
+       
+       return str;
+}
+
+
+// Convert binary to base64 using openssl library function.  We should replace this with the encoder from libcitadel.
+char *dkim_base64_encode(const unsigned char *input, int length) {
+       BIO *bmem, *b64;
+       BUF_MEM *bptr;
+
+       b64 = BIO_new(BIO_f_base64());
+       bmem = BIO_new(BIO_s_mem());
+       b64 = BIO_push(b64, bmem);
+       
+       BIO_write(b64, input, length);
+       BIO_flush(b64);
+       BIO_get_mem_ptr(b64, &bptr);
+
+       char *buf = malloc(bptr->length);
+       memcpy(buf, bptr->data, bptr->length-1);
+       buf[bptr->length-1] = '\0';
+
+       BIO_free_all(b64);
+
+       // remove line breaks
+       int buf_len = strlen(buf);
+       int i, cur = 0;
+       for (i = 0; i < buf_len; ++i) {
+               if (buf[i] != '\r' && buf[i] != '\n') {
+                       buf[cur++] = buf[i];
+               }
+       }
+       buf[cur] = '\0';        
+       return buf;
+}
+
+
+// Canonicalize one line of the message body as per the "relaxed" algorithm
+char *dkim_canonicalize_body_line(char *line) {
+       int line_len = 0;
+       int i;
+       
+       // Ignores all whitespace at the end of lines.  Implementations MUST NOT remove the CRLF at the end of the line.
+       dkim_rtrim(line);
+       
+       // Reduces all sequences of whitespace within a line to a single SP character.
+       line_len = strlen(line);
+       int new_len = 0;
+
+       for (i = 0; i < line_len; ++i) {
+               if (line[i] == '\t') {
+                       line[i] = ' ';
+               }
+       
+               if (i > 0) {
+                       if (!(line[i-1] == ' ' && line[i] == ' ')) {
+                               line[new_len++] = line[i];
+                       }
+               }
+               else {
+                       line[new_len++] = line[i];
+               }
+       }
+       
+       line[new_len] = '\0';
+       return line;
+}
+
+
+// Canonicalize the message body as per the "relaxed" algorithm
+char *dkim_canonicalize_body(char *body) {
+       int i = 0;
+       int offset = 0;
+       int body_len = strlen(body);
+
+       char *new_body = malloc(body_len*2+3);
+       int new_body_len = 0;
+
+       for (i = 0; i < body_len; ++i) {
+               int is_r = 0;
+
+               if (body[i] == '\n') {
+                       if (i > 0) {
+                               if (body[i-1] == '\r') {
+                                       i--;
+                                       is_r = 1;
+                               }
+                       }
+
+                       char *line = malloc(i - offset + 1);    
+                       memcpy(line, body+offset, i-offset);
+                       line[i-offset] = '\0';
+
+                       dkim_canonicalize_body_line(line);
+
+                       int line_len = strlen(line);
+                       memcpy(new_body+new_body_len, line, line_len);
+                       memcpy(new_body+new_body_len+line_len, "\r\n", 2);
+                       new_body_len += line_len+2;
+
+                       if (is_r) {
+                               i++;
+                       }       
+
+                       offset = i+1;
+                       free(line);
+               }
+       }
+
+       if (offset < body_len) {
+               char *line = malloc(i - offset + 1);    
+               memcpy(line, body+offset, i-offset);
+               line[i-offset] = '\0';
+
+               dkim_canonicalize_body_line(line);
+
+               int line_len = strlen(line);
+               memcpy(new_body+new_body_len, line, line_len);
+               memcpy(new_body+new_body_len+line_len, "\r\n", 2);
+               new_body_len += line_len+2;
+
+               free(line);
+       }
+
+       memcpy(new_body+new_body_len, "\0", 1);
+
+       // Ignores all empty lines at the end of the message body.  "Empty line" is defined in Section 3.4.3.
+       new_body = dkim_rtrim_lines(new_body);
+
+       // Note that a completely empty or missing body is canonicalized as a
+        // single "CRLF"; that is, the canonicalized length will be 2 octets.
+       new_body_len = strlen(new_body);
+       new_body[new_body_len++] = '\r';
+       new_body[new_body_len++] = '\n';
+       new_body[new_body_len] = '\0';
+
+       return new_body;        
+}
+
+
+// Second step to canonicalize a block of headers as per the "relaxed" algorithm.
+// Unfold all headers onto single lines.
+void dkim_unfold_headers(StrBuf *unfolded_headers) {
+       char *headers_start = (char *)ChrPtr(unfolded_headers);
+       char *fold;
+
+       while (
+               fold = strstr(headers_start, "\r\n "),                          // find the first holded header
+               fold = (fold ? fold : strstr(headers_start, "\r\n\t")),         // it could be folded with tabs
+               fold != NULL                                                    // keep going until there aren't any left
+       ) {
+
+               // Replace CRLF<space> or CRLF<tab> with CRLF
+               StrBufReplaceToken(unfolded_headers, (long)(fold-headers_start), 3, HKEY("\r\n"));
+
+               // And when we've got them all, remove the CRLF as well.
+               if (
+                       (strstr(headers_start, "\r\n ") != fold)
+                       && (strstr(headers_start, "\r\n\t") != fold)
+                       && (!strncmp(fold, HKEY("\r\n")))
+               ) {
+                       StrBufReplaceToken(unfolded_headers, (long)(fold-headers_start), 2, HKEY(""));
+               }
+
+       }
+}
+
+
+// Second step to canonicalize a block of headers as per the "relaxed" algorithm.
+// Headers MUST already be unfolded with dkim_unfold_headers()
+void dkim_canonicalize_unfolded_headers(StrBuf *headers) {
+
+       char *cheaders = (char *)ChrPtr(headers);
+       char *ptr = cheaders;
+       while (*ptr) {
+
+               // We are at the beginning of a line.  Find the colon separator between field name and value.
+               char *start_of_this_line = ptr;
+               char *colon = strstr(ptr, ":");
+
+               // remove whitespace after the colon
+               while ( (*(colon+1) == ' ') || (*(colon+2) == '\t') ) {
+                       StrBufReplaceToken(headers, (long)(colon+1-cheaders), 1, HKEY(""));
+               }
+               char *end_of_this_line = strstr(ptr, "\r\n");
+
+               // Convert header field names to all lower case
+               for (char *c = start_of_this_line; c<colon; ++c) {
+                       cheaders[c-cheaders] = tolower(cheaders[c-cheaders]);
+               }
+
+               ptr = end_of_this_line + 2;                                     // Advance to the beginning of the next line
+       }
+}
+
+
+// Third step to canonicalize a block of headers as per the "relaxed" algorithm.
+// Reduce the canonicalized header block to only the fields being signed
+void dkim_reduce_canonicalized_headers(StrBuf *headers, char *header_list) {
+
+       char *cheaders = (char *)ChrPtr(headers);
+       char *ptr = cheaders;
+       while (*ptr) {
+
+               // We are at the beginning of a line.  Find the colon separator between field name and value.
+               char *start_of_this_line = ptr;
+               char *colon = strstr(ptr, ":");
+               char *end_of_this_line = strstr(ptr, "\r\n");
+
+               char relevant_headers[1024];
+               strncpy(relevant_headers, header_list, sizeof(relevant_headers));
+               char *rest = relevant_headers;
+               char *token = NULL;
+               int keep_this_header = 0;
+
+               while (token = strtok_r(rest, ":", &rest)) {
+                       if (!strncmp(start_of_this_line, token, strlen(token))) {
+                               keep_this_header = 1;
+                       }
+               }
+
+               if (keep_this_header) {                                          // Advance to the beginning of the next line
+                       ptr = end_of_this_line + 2;
+               }
+               else {
+                       StrBufReplaceToken(headers, (long)(start_of_this_line - cheaders), end_of_this_line-start_of_this_line+2, HKEY(""));
+               }
+       }
+
+}
+
+
+// Make a new header list containing only the headers actually present in the canonicalized header block.
+void dkim_final_header_list(char *header_list, size_t header_list_size, StrBuf *unfolded_headers) {
+       header_list[0] = 0;
+
+       char *cheaders = (char *)ChrPtr(unfolded_headers);
+       char *ptr = cheaders;
+       while (*ptr) {
+
+               // We are at the beginning of a line.  Find the colon separator between field name and value.
+               char *start_of_this_line = ptr;
+               char *colon = strstr(ptr, ":");
+               char *end_of_this_line = strstr(ptr, "\r\n");
+
+               if (ptr != cheaders) {
+                       strcat(header_list, ":");
+               }
+
+               strncat(header_list, start_of_this_line, (colon-start_of_this_line));
+
+               ptr = end_of_this_line + 2;                                     // Advance to the beginning of the next line
+       }
+}
+
+
+// DKIM-sign an email, supplied as a full RFC2822-compliant message stored in a StrBuf
+void dkim_sign(StrBuf *email, char *pkey_in, char *domain, char *selector) {
+       int i = 0;
+
+       if (!email) {                                                           // no message was supplied
+               return;
+       }
+
+       // find the break between headers and body
+       size_t msglen = StrLength(email);                                       // total length of message (headers + body)
+
+       char *body_ptr = strstr(ChrPtr(email), "\r\n\r\n");
+       if (body_ptr == NULL) {
+               fprintf(stderr, "dkim: this message cannot be signed because it has no body\n");
+               return;
+       }
+
+       size_t body_offset = body_ptr - ChrPtr(email);                          // offset at which message body begins
+       StrBuf *header_block = NewStrBufPlain(ChrPtr(email), body_offset+2);    // headers only (the +2 makes it include final CRLF)
+       //fprintf(stderr, "Supplied headers:\n-----\n%s-----\n", ChrPtr(header_block));
+
+       // This removes the headers from the supplied email buffer.  We MUST put them back in later.
+       StrBufCutLeft(email, body_offset+4);                                    // The +4 makes it NOT include the CRLFCRLF
+       //fprintf(stderr, "Body:\n-----\n%s-----\n", ChrPtr(email));
+
+       // Apply the "relaxed" canonicalization to the message body
+       char *relaxed_body = dkim_canonicalize_body((char *)ChrPtr(email));
+       int relaxed_body_len = strlen(relaxed_body);
+       //fprintf(stderr, "Canonicalized body:\n-----\n%s-----\n", relaxed_body);
+
+       // hash of the canonicalized body
+       unsigned char *uhash = malloc(SHA256_DIGEST_LENGTH);
+       SHA256((unsigned char*)relaxed_body, relaxed_body_len, uhash);
+       free(relaxed_body);                                                     // don't need this anymore
+       relaxed_body = NULL;
+
+       // base64 encode the body hash
+       char *encoded_body_hash = dkim_base64_encode(uhash, SHA256_DIGEST_LENGTH);
+       free(uhash);
+
+       // In the test harness email, the body hash should be 2PMBIMGyD2GZlndKI2MRsbhiMr6jD5rCxhq+mCvY7os=
+       // fprintf(stderr, "Body hash: %s\n", encoded_body_hash);
+
+       // "relaxed" header canonicalization, step 1 : unfold the headers
+       StrBuf *unfolded_headers = NewStrBufDup(header_block);
+       dkim_unfold_headers(unfolded_headers);
+
+       // "relaxed" header canonicalization, step 2 : lowercase the header names, remove whitespace after the colon
+       dkim_canonicalize_unfolded_headers(unfolded_headers);
+
+       // "relaxed" header canonicalization, step 3 : reduce the canonicalized header block to only the fields being signed
+       char *header_list = "from:to:cc:reply-to:subject:date:list-unsubscribe:list-unsubscribe-post";
+       dkim_reduce_canonicalized_headers(unfolded_headers, header_list);
+
+       // fprintf(stderr, "Canonicalized headers:\n-----\n%s-----\n", (char *)ChrPtr(unfolded_headers));
+
+       // Make a new header list containing only the ones we actually have.
+       char final_header_list[1024];
+       dkim_final_header_list(final_header_list, sizeof(final_header_list), unfolded_headers);
+
+       // create DKIM header
+       time_t now = time(NULL);                                                // signature timestamp
+       StrBuf *dkim_header = NewStrBuf();
+       StrBufPrintf(dkim_header,
+               "v=1; a=rsa-sha256; s=%s; d=%s; l=%d; t=%d; c=relaxed/relaxed; h=%s; bh=%s; b=",
+               selector,
+               domain,
+               relaxed_body_len,
+               now,
+               final_header_list,
+               encoded_body_hash
+       );
+       free(encoded_body_hash);                                                // Don't need this anymore either.
+
+       // Add the initial DKIM header (which is still missing the value after "b=") to the headers to be signed.
+       // As far as I can tell, RFC6376 does *not* want us to include any "\r\n" after "b="
+       StrBufAppendBufPlain(unfolded_headers, HKEY("dkim-signature:"), 0);
+       StrBufAppendBuf(unfolded_headers, dkim_header, 0);
+       // fprintf(stderr, "Canonicalized headers:\n-----\n%s\n-----\n", (char *)ChrPtr(unfolded_headers));
+
+       // Compute a hash of the canonicalized headers.  RFC6376 says that we hash and sign everything up to the "b="
+       // and then we'll add the rest at the end.
+       unsigned char *headers_hash = malloc(SHA256_DIGEST_LENGTH);
+       SHA256((unsigned char*)ChrPtr(unfolded_headers), StrLength(unfolded_headers), headers_hash);
+       FreeStrBuf(&unfolded_headers);                                          // All we need now is the hash.
+
+       // Load the private key into an OpenSSL "BIO" structure
+       BIO *bufio = BIO_new_mem_buf((void*)pkey_in, strlen(pkey_in));
+       if (bufio == NULL) {
+               fprintf(stderr, "dkim: BIO_new_mem_buf() failed\n");
+               abort();
+       }
+
+       // Now import the private key
+       EVP_PKEY *pkey = NULL;                  // Don't combine this line with the next one.  It will barf.
+       pkey = PEM_read_bio_PrivateKey(
+               bufio,                          // BIO to read the private key from
+               &pkey,                          // pointer to EVP_PKEY structure
+               NULL,                           // password callback - can be NULL
+               NULL                            // parameter passed to callback or password if callback is NULL
+       );
+       if (pkey == NULL) {
+               fprintf(stderr, "dkim: PEM_read_bio_PrivateKey() failed\n");
+               abort();
+       }
+       BIO_free(bufio);                        // Don't need this anymore, we have `pkey` now
+
+       // Everything I ever needed to know I learned from https://wiki.openssl.org/index.php/EVP_Signing_and_Verifying
+       EVP_MD_CTX *mdctx = NULL;
+
+       // Create the Message Digest Context
+       mdctx = EVP_MD_CTX_create();
+       if (mdctx == NULL) {
+               fprintf(stderr, "dkim: EVP_MD_CTX_create() failed\n");
+               abort();
+       }
+
+       // Initialize the DigestSign operation using SHA-256 algorithm
+       if (EVP_DigestSignInit(mdctx, NULL, EVP_sha256(), NULL, pkey) != 1) {
+               fprintf(stderr, "dkim: EVP_DigestSignInit() failed\n");
+               abort();
+       }
+
+       // free the private key (we don't need it past here)
+       EVP_PKEY_free(pkey);
+
+       // Call update with the "message" (the canonicalized headers)
+       if (EVP_DigestSignUpdate(mdctx, headers_hash, SHA256_DIGEST_LENGTH) != 1) {
+               fprintf(stderr, "dkim: EVP_DigestSignUpdate() failed\n");
+               abort();
+       }
+       free(headers_hash);
+
+       // Finalize the DigestSign operation.
+
+       // First call EVP_DigestSignFinal with a NULL sig parameter to obtain the length of the signature.
+       // Length is returned in slen
+       size_t slen;
+       if (EVP_DigestSignFinal(mdctx, NULL, &slen) != 1) {
+               fprintf(stderr, "dkim: EVP_DigestSignFinal() failed\n");
+               abort();
+       }
+
+       // Allocate memory for the signature based on size in slen
+       unsigned char *sig = NULL;
+       sig = OPENSSL_malloc(slen);
+       if (sig == NULL) {
+               fprintf(stderr, "dkim: OPENSSL_malloc() failed\n");
+               abort();
+       }
+
+       // Obtain the signature
+       if (EVP_DigestSignFinal(mdctx, sig, &slen) != 1) {
+               fprintf(stderr, "dkim: EVP_DigestSignFinal() failed\n");
+               abort();
+       }
+       EVP_MD_CTX_free(mdctx);
+
+       // base64 encode the signature
+       char *sig_b64 = dkim_base64_encode(sig, slen);
+       int sig_b64_len = strlen(sig_b64);
+       free(sig);                                                      // Free the raw signature, keep the b64-encoded one.
+       
+       // We should probably wrap the dkim signature here.
+
+       // Add the signature to the original header block.  Also append the extra CRLF indicating end of headers.
+       StrBufAppendPrintf(header_block, "DKIM-Signature: %s%s\r\n\r\n", (char *)ChrPtr(dkim_header), sig_b64);
+       FreeStrBuf(&dkim_header);                                       // These were added to the final header block.
+       free(sig_b64);                                                  // So we don't need them anymore.
+
+       // Append the body.
+       StrBufAppendBuf(header_block, email, 0);
+       SwapBuffers(header_block, email);
+       FreeStrBuf(&header_block);
+
+       // And we're done!
+}
+
+
+char *private_key =
+       "-----BEGIN PRIVATE KEY-----\n"
+       "MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDfuefcepokRrnp\n"
+       "SSDsxu+QDqeD8GL9QnZz/N6IxTdBv6Wc10ExBe2IjS5dKI7AvhSSEK0zGE8Hkpmw\n"
+       "eccbiepQqeueteWzAMZ1uT43bD3k7eye7vWobiOP9QtoYGR6sG25h2W5Tbc91W4f\n"
+       "dvYnxYVJjx8wIVF0f3o25v+rQueoo0HlvGyA9/xi9GAaJL05OmK1xnMJgSvW/Q8Q\n"
+       "zq7apf1D6XPXHuhv5tevElkZ5jlvM2w0cTVyAzMrUh6Rkcn9xM4/NPWYghBc3jO4\n"
+       "TrPnSrobQGrX0fcizE/FN6I0in0Ke8Z+gMM8NeFcsjvLZe9MpY9i0pw/ygLIh5t3\n"
+       "O4qpwC1JAgMBAAECggEAIwiTCMEAGzciDKhhagJ66BWLYMtHTP5X2zDZThSH4xlW\n"
+       "HznL4RfbCtuEy5y6we7h/L90x8ACPB7WRz7CkYrmsMvy9A7q0b2I1k10MyyVgqBJ\n"
+       "QdgMitv4YKYQK7+QbG/tNrS/lqVXUOz3iiDQSgkRpqOtUBWfkj0WD7vbhF99NDhV\n"
+       "dxaehFkKv3yNy0bXJlHJBJ6KtOUnDwub8TExh8dyj3kB8Qzj4I98shaXPNUSSaOw\n"
+       "zG6QG72yrxlMs495jkIPbF2JDidmLrX+oVISwKyaBWx+BkFV/KFAEKgaB5/nCw7+\n"
+       "qq/jxsmXim3HuQ3MIAjq1yw9aGRH1HMi8Gn7tYlNGwKBgQDy6EEKpuEiW9wwlI2+\n"
+       "GVuSkhSTTX1h6qK/ay8Jtyb8yJM/BxogAQlfjdgFixiZHy5MaomTbfeT2GDji553\n"
+       "+RsnZ60+g7FI9nHwabSxtuCQ+vjbFqCsdMPAiSeG0bEzo0zf5TjASdUtuZL0vXjl\n"
+       "yMZWDEuESoVNlYlvCOVkw2nvIwKBgQDryPuSq6PNVHRWsKRRs5ju4wKs/1ucBOg5\n"
+       "gCcN8lE03mFCWAlZhypE4/fAhTQ/a5KQoAzc0QZcXRueDyNsnc+QWw3/QWf8/fkV\n"
+       "HPfTWS3Dcuj+4RnWUucaZ/mKFlTC3+eNSlpyaPIMlCjXGsJ9GlPrsaAi9KPbD2v/\n"
+       "XcMq/PMOowKBgHVf7S3sfZVQthFzdxqIvksQ84hKRW/vJT1B2bTkH56+fQhTsjgM\n"
+       "yC64J85l7DjxbDnYsSngVWXHhOnvKV/nq0tbOcefcydCjsQREBNfvxvPajjTskgj\n"
+       "FAQRQlxPL0U4f4khBk9EXhJ+PZithaHjZpNl1YfTSp62x3Yz4kTSeHnpAoGAGn5m\n"
+       "5kArE7NdrzACBrwrfww7DL1Uyd8zSOLBgKutvEcQnqfNxSWO9la3TAarrESmH2Ic\n"
+       "j+Nc15wOsl/5FwdUf1/73qa2zJKtHlY28qSeo8uRqrIYeSCvnyP3wjBoLc2C8zlb\n"
+       "mGd6azdqr2DuYahHrcAzwjnC/6Zn+DXM7FOn7AkCgYBp1xxY88cCoF24yffkD3MC\n"
+       "ACUury4qRSDTGx6/qCCkIyWxg1vuiDrlPWhSwQznxHvovcfpdjdbWcFY87IK6mpG\n"
+       "aJHwMJ7Kw+baoxGPZWHwdg6BgvUCihe3xlcaq6rOBoLviD6FOzbogg++Tvi0LemG\n"
+       "y/wEs/mZkaRzW4n41ir0Xw==\n"
+       "-----END PRIVATE KEY-----\n"
+;
+
author	Art Cancro <ajc@citadel.org>
	Wed, 8 May 2024 17:15:17 +0000 (10:15 -0700)
committer	Art Cancro <ajc@citadel.org>
	Wed, 8 May 2024 17:15:17 +0000 (10:15 -0700)