add functions to escape utf8 sequnces for xml streams
[citadel.git] / libcitadel / lib / stringbuf.c
index eb751c8d70d0931e31abae11b3a4b7d8f638dc94..dd15b0801807788bff5a248e45c5584253cfb86e 100644 (file)
@@ -1925,6 +1925,129 @@ void StrBufUrlescUPAppend(StrBuf *OutBuf, const StrBuf *In, const char *PlainIn)
        *pt = '\0';
 }
 
+/** 
+ * @ingroup StrBuf_DeEnCoder
+ * @brief append a string with characters having a special meaning in xml encoded to the buffer
+ * @param OutBuf the output buffer
+ * @param In Buffer to encode
+ * @param PlainIn way in from plain old c strings
+ * @param PlainInLen way in from plain old c strings; maybe you've got binary data or know the length?
+ * @param OverrideLowChars should chars < 0x20 be replaced by _ or escaped as xml entity?
+ */
+void StrBufXMLEscAppend(StrBuf *OutBuf,
+                       const StrBuf *In,
+                       const char *PlainIn,
+                       long PlainInLen,
+                       int OverrideLowChars)
+{
+       const char *pch, *pche;
+       char *pt, *pte;
+       int IsUtf8Sequence;
+       int len;
+
+       if (((In == NULL) && (PlainIn == NULL)) || (OutBuf == NULL) )
+               return;
+       if (PlainIn != NULL) {
+               if (PlainInLen < 0)
+                       len = strlen((const char*)PlainIn);
+               else
+                       len = PlainInLen;
+               pch = PlainIn;
+               pche = pch + len;
+       }
+       else {
+               pch = (const char*)In->buf;
+               pche = pch + In->BufUsed;
+               len = In->BufUsed;
+       }
+
+       if (len == 0)
+               return;
+
+       pt = OutBuf->buf + OutBuf->BufUsed;
+       /**< we max append 6 chars at once plus the \0 */
+       pte = OutBuf->buf + OutBuf->BufSize - 6;
+
+       while (pch < pche) {
+               if (pt >= pte) {
+                       OutBuf->BufUsed = pt - OutBuf->buf;
+                       IncreaseBuf(OutBuf, 1, -1);
+                       pte = OutBuf->buf + OutBuf->BufSize - 6;
+                       /**< we max append 3 chars at once plus the \0 */
+
+                       pt = OutBuf->buf + OutBuf->BufUsed;
+               }
+
+               if (*pch == '<') {
+                       memcpy(pt, HKEY("&lt;"));
+                       pt += 4;
+                       pch ++;
+               }
+               else if (*pch == '>') {
+                       memcpy(pt, HKEY("&gt;"));
+                       pt += 4;
+                       pch ++;
+               }
+               else if (*pch == '&') {
+                       memcpy(pt, HKEY("&amp;"));
+                       pt += 5;
+                       pch++;
+               }
+               else if ((*pch >= 0x20) && (*pch <= 0x7F)) {
+                       *pt = *pch;
+                       pt++; pch++;
+               }
+               else if (*pch < 0x20) {
+                       /* we probably shouldn't be doing this */
+                       if (OverrideLowChars)
+                       {
+                               *pt = '_';
+                               pt ++;
+                               pch ++;
+                       }
+                       else
+                       {
+                               *pt = '&';
+                               pt++;
+                               *pt = HexList[*(unsigned char*)pch][0];
+                               pt ++;
+                               *pt = HexList[*(unsigned char*)pch][1];
+                               pt ++; pch ++;
+                               *pt = '&';
+                               pt++;
+                               pch ++;
+                       }
+               }
+               else {
+                       IsUtf8Sequence =  Ctdl_GetUtf8SequenceLength(pch, pche);
+                       if (IsUtf8Sequence)
+                       {
+                               while (IsUtf8Sequence > 0){
+                                       *pt = *pch;
+                                       pt ++;
+                                       pch ++;
+                                       --IsUtf8Sequence;
+                               }
+                       }
+                       else
+                       {
+                               *pt = '&';
+                               pt++;
+                               *pt = HexList[*(unsigned char*)pch][0];
+                               pt ++;
+                               *pt = HexList[*(unsigned char*)pch][1];
+                               pt ++; pch ++;
+                               *pt = '&';
+                               pt++;
+                               pch ++;
+                       }
+               }
+       }
+       *pt = '\0';
+       OutBuf->BufUsed = pt - OutBuf->buf;
+}
+
+
 /** 
  * @ingroup StrBuf_DeEnCoder
  * @brief append a string in hex encoding to the buffer
@@ -2882,6 +3005,98 @@ int StrBufRFC2047encode(StrBuf **target, const StrBuf *source)
        return (*target)->BufUsed;;
 }
 
+/**
+ * @ingroup StrBuf_DeEnCoder
+ * @brief      Quoted-Printable encode a message; make it < 80 columns width.
+ * @param      source          Source string to be encoded.
+ * @returns     buffer with encoded message.
+ */
+StrBuf *StrBufRFC2047encodeMessage(const StrBuf *EncodeMe)
+{
+       StrBuf *OutBuf;
+       char *Optr, *OEptr;
+       const char *ptr, *eptr;
+       unsigned char ch;
+       int LinePos;
+
+       OutBuf = NewStrBufPlain(NULL, StrLength(EncodeMe) * 4);
+       Optr = OutBuf->buf;
+       OEptr = OutBuf->buf + OutBuf->BufSize;
+       ptr = EncodeMe->buf;
+       eptr = EncodeMe->buf + EncodeMe->BufUsed;
+       LinePos = 0;
+
+       while (ptr < eptr)
+       {
+               if (Optr + 4 >= OEptr)
+               {
+                       long Offset;
+                       Offset = Optr - OutBuf->buf;
+                       OutBuf->BufUsed = Optr - OutBuf->buf;
+                       IncreaseBuf(OutBuf, 1, 0);
+                       Optr = OutBuf->buf + Offset;
+                       OEptr = OutBuf->buf + OutBuf->BufSize;
+               }
+               if ((*ptr == '\r') || (*ptr == '\n'))
+               {
+                       /* ignore carriage returns */
+                       ptr ++;
+               }
+               else if (*ptr == 10) {
+                       /* hard line break */
+                       if ((LinePos > 0) && (isspace(*(Optr-1))))
+                       {
+                               memcpy(Optr, HKEY("=0A"));
+                               Optr += 3;
+                       }
+                       ptr ++;
+                       LinePos = 0;
+               }
+               else if (( (*ptr >= 32) && (*ptr <= 60) ) ||
+                        ( (*ptr >= 62) && (*ptr <= 126) ))
+               {
+                       *Optr = *ptr;
+                       Optr ++;
+                       ptr ++;
+                       LinePos ++;
+               }
+               else {
+                       ch = *ptr;
+                       *Optr = '=';
+                       Optr ++;
+                       *Optr = HexList[ch][0];
+                       Optr ++;
+                       *Optr = HexList[ch][1];
+                       Optr ++;
+                       LinePos += 3;
+                       ptr ++;
+               }
+
+               if (LinePos > 72) {
+                       /* soft line break */
+                       if (isspace(*(Optr - 1))) {
+                               ch = *(Optr - 1);
+                               Optr --;
+                               *Optr = '=';
+                               Optr ++;
+                               *Optr = HexList[ch][0];
+                               Optr ++;
+                               *Optr = HexList[ch][1];
+                               Optr ++;
+                               LinePos += 3;
+                       }
+                       *Optr = '=';
+                       Optr ++;
+                       *Optr = '\n';
+                       Optr ++;
+                       LinePos = 0;
+               }
+       }
+       *Optr = '\0';
+       OutBuf->BufUsed = Optr - OutBuf->buf;
+
+       return OutBuf;
+}
 
 
 static void AddRecipient(StrBuf *Target,