upgrade package vmime

This commit is contained in:
Mark Brand 2010-10-12 23:54:31 +02:00
parent 90e600677d
commit a00407d242
1 changed files with 544 additions and 0 deletions

View File

@ -17671,3 +17671,547 @@ index 4e2c9a7..f2d42b6 100644
// TODO: UUEncode
VMIME_TEST_SUITE_END
commit fb268637f2f06d710f0ef475d71a8d4034a28e6c
Author: vincent-richard <vincent-richard@5301114d-f842-0410-bbdd-996ee0417009>
Date: Tue Oct 12 17:10:58 2010 +0000
Better RFC-2047 encoding.
git-svn-id: https://vmime.svn.sourceforge.net/svnroot/vmime/trunk@567 5301114d-f842-0410-bbdd-996ee0417009
diff --git a/src/utility/encoder/qpEncoder.cpp b/src/utility/encoder/qpEncoder.cpp
index e20be9f..aa95022 100644
--- a/src/utility/encoder/qpEncoder.cpp
+++ b/src/utility/encoder/qpEncoder.cpp
@@ -51,10 +51,52 @@ const std::vector <string> qpEncoder::getAvailableProperties() const
-// Encoding table
+// Hex-encoding table
const unsigned char qpEncoder::sm_hexDigits[] = "0123456789ABCDEF";
-// Decoding table
+
+// RFC-2047 encoding table: we always encode RFC-2047 using the restricted
+// charset, that is the one used for 'phrase' in From/To/Cc/... headers.
+//
+// " The set of characters that may be used in a "Q"-encoded 'encoded-word'
+// is restricted to: <upper and lower case ASCII letters, decimal digits,
+// "!", "*", "+", "-", "/", "=", and "_" (underscore, ASCII 95.)>. "
+//
+// Two special cases:
+// - encode space (32) as underscore (95)
+// - encode underscore as hex (=5F)
+//
+// This is a quick lookup table:
+// '1' means "encode", '0' means "no encoding"
+//
+const unsigned char qpEncoder::sm_RFC2047EncodeTable[] =
+{
+ /* 0 NUL */ 1, /* 1 SOH */ 1, /* 2 STX */ 1, /* 3 ETX */ 1, /* 4 EOT */ 1, /* 5 ENQ */ 1,
+ /* 6 ACK */ 1, /* 7 BEL */ 1, /* 8 BS */ 1, /* 9 TAB */ 1, /* 10 LF */ 1, /* 11 VT */ 1,
+ /* 12 FF */ 1, /* 13 CR */ 1, /* 14 SO */ 1, /* 15 SI */ 1, /* 16 DLE */ 1, /* 17 DC1 */ 1,
+ /* 18 DC2 */ 1, /* 19 DC3 */ 1, /* 20 DC4 */ 1, /* 21 NAK */ 1, /* 22 SYN */ 1, /* 23 ETB */ 1,
+ /* 24 CAN */ 1, /* 25 EM */ 1, /* 26 SUB */ 1, /* 27 ESC */ 1, /* 28 FS */ 1, /* 29 GS */ 1,
+ /* 30 RS */ 1, /* 31 US */ 1, /* 32 SPACE*/ 1, /* 33 ! */ 0, /* 34 " */ 1, /* 35 # */ 1,
+ /* 36 $ */ 1, /* 37 % */ 1, /* 38 & */ 1, /* 39 ' */ 1, /* 40 ( */ 1, /* 41 ) */ 1,
+ /* 42 * */ 0, /* 43 + */ 0, /* 44 , */ 1, /* 45 - */ 0, /* 46 . */ 1, /* 47 / */ 0,
+ /* 48 0 */ 0, /* 49 1 */ 0, /* 50 2 */ 0, /* 51 3 */ 0, /* 52 4 */ 0, /* 53 5 */ 0,
+ /* 54 6 */ 0, /* 55 7 */ 0, /* 56 8 */ 0, /* 57 9 */ 0, /* 58 : */ 1, /* 59 ; */ 1,
+ /* 60 < */ 1, /* 61 = */ 1, /* 62 > */ 1, /* 63 ? */ 1, /* 64 @ */ 1, /* 65 A */ 0,
+ /* 66 B */ 0, /* 67 C */ 0, /* 68 D */ 0, /* 69 E */ 0, /* 70 F */ 0, /* 71 G */ 0,
+ /* 72 H */ 0, /* 73 I */ 0, /* 74 J */ 0, /* 75 K */ 0, /* 76 L */ 0, /* 77 M */ 0,
+ /* 78 N */ 0, /* 79 O */ 0, /* 80 P */ 0, /* 81 Q */ 0, /* 82 R */ 0, /* 83 S */ 0,
+ /* 84 T */ 0, /* 85 U */ 0, /* 86 V */ 0, /* 87 W */ 0, /* 88 X */ 0, /* 89 Y */ 0,
+ /* 90 Z */ 0, /* 91 [ */ 1, /* 92 " */ 1, /* 93 ] */ 1, /* 94 ^ */ 1, /* 95 _ */ 1,
+ /* 96 ` */ 1, /* 97 a */ 0, /* 98 b */ 0, /* 99 c */ 0, /* 100 d */ 0, /* 101 e */ 0,
+ /* 102 f */ 0, /* 103 g */ 0, /* 104 h */ 0, /* 105 i */ 0, /* 106 j */ 0, /* 107 k */ 0,
+ /* 108 l */ 0, /* 109 m */ 0, /* 110 n */ 0, /* 111 o */ 0, /* 112 p */ 0, /* 113 q */ 0,
+ /* 114 r */ 0, /* 115 s */ 0, /* 116 t */ 0, /* 117 u */ 0, /* 118 v */ 0, /* 119 w */ 0,
+ /* 120 x */ 0, /* 121 y */ 0, /* 122 z */ 0, /* 123 { */ 1, /* 124 | */ 1, /* 125 } */ 1,
+ /* 126 ~ */ 1, /* 127 DEL */ 1
+};
+
+
+// Hex-decoding table
const unsigned char qpEncoder::sm_hexDecodeTable[256] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -76,6 +118,36 @@ const unsigned char qpEncoder::sm_hexDecodeTable[256] =
};
+// static
+bool qpEncoder::RFC2047_isEncodingNeededForChar(const unsigned char c)
+{
+ return (c >= 128 || sm_RFC2047EncodeTable[c] != 0);
+}
+
+
+// static
+int qpEncoder::RFC2047_getEncodedLength(const unsigned char c)
+{
+ if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
+ {
+ if (c == 32) // space
+ {
+ // Encoded as "_"
+ return 1;
+ }
+ else
+ {
+ // Hex encoding
+ return 3;
+ }
+ }
+ else
+ {
+ return 1; // no encoding
+ }
+}
+
+
#ifndef VMIME_BUILDING_DOC
#define QP_ENCODE_HEX(x) \
@@ -83,7 +155,7 @@ const unsigned char qpEncoder::sm_hexDecodeTable[256] =
outBuffer[outBufferPos + 1] = sm_hexDigits[x >> 4]; \
outBuffer[outBufferPos + 2] = sm_hexDigits[x & 0xF]; \
outBufferPos += 3; \
- curCol += 3;
+ curCol += 3
#define QP_WRITE(s, x, l) s.write(reinterpret_cast <utility::stream::value_type*>(x), l)
@@ -145,34 +217,51 @@ utility::stream::size_type qpEncoder::encode(utility::inputStream& in,
// Get the next char and encode it
const unsigned char c = static_cast <unsigned char>(buffer[bufferPos++]);
- switch (c)
- {
- case '.':
+ if (rfc2047)
{
- if (!rfc2047 && curCol == 0)
+ if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
{
- // If a '.' appears at the beginning of a line, we encode it to
- // to avoid problems with SMTP servers... ("\r\n.\r\n" means the
- // end of data transmission).
- QP_ENCODE_HEX('.')
- continue;
+ if (c == 32) // space
+ {
+ // RFC-2047, Page 5, 4.2. The "Q" encoding:
+ // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
+ // represented as "_" (underscore, ASCII 95.). >>
+ outBuffer[outBufferPos++] = '_';
+ ++curCol;
+ }
+ else
+ {
+ // Other characters: '=' + hexadecimal encoding
+ QP_ENCODE_HEX(c);
+ }
+ }
+ else
+ {
+ // No encoding
+ outBuffer[outBufferPos++] = c;
+ ++curCol;
}
-
- outBuffer[outBufferPos++] = '.';
- ++curCol;
- break;
}
- case ' ':
+ else
{
- // RFC-2047, Page 5, 4.2. The "Q" encoding:
- // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
- // represented as "_" (underscore, ASCII 95.). >>
- if (rfc2047)
+ switch (c)
{
- outBuffer[outBufferPos++] = '_';
+ case 46: // .
+ {
+ if (curCol == 0)
+ {
+ // If a '.' appears at the beginning of a line, we encode it to
+ // to avoid problems with SMTP servers... ("\r\n.\r\n" means the
+ // end of data transmission).
+ QP_ENCODE_HEX('.');
+ continue;
+ }
+
+ outBuffer[outBufferPos++] = '.';
++curCol;
+ break;
}
- else
+ case 32: // space
{
// Need to get more data?
if (bufferPos >= bufferLength)
@@ -192,100 +281,74 @@ utility::stream::size_type qpEncoder::encode(utility::inputStream& in,
outBuffer[outBufferPos++] = ' ';
++curCol;
}
- }
- break;
- }
- case '\t':
- {
- QP_ENCODE_HEX(c)
- break;
- }
- case '\r':
- case '\n':
- {
- // Text mode (where using CRLF or LF or ... does not
- // care for a new line...)
- if (text)
- {
- outBuffer[outBufferPos++] = c;
- ++curCol;
+ break;
}
- // Binary mode (where CR and LF bytes are important!)
- else
+ case 9: // TAB
{
- QP_ENCODE_HEX(c)
- }
-
- break;
- }
- case '=':
- {
- QP_ENCODE_HEX('=')
- break;
- }
- // RFC-2047 'especials' characters
- case ',':
- case ';':
- case ':':
- case '_':
- case '@':
- case '(':
- case ')':
- case '<':
- case '>':
- case '[':
- case ']':
- case '"':
- {
- if (rfc2047)
- {
- QP_ENCODE_HEX(c)
+ QP_ENCODE_HEX(c);
+ break;
}
- else
+ case 13: // CR
+ case 10: // LF
{
- outBuffer[outBufferPos++] = c;
- ++curCol;
- }
+ // Text mode (where using CRLF or LF or ... does not
+ // care for a new line...)
+ if (text)
+ {
+ outBuffer[outBufferPos++] = c;
+ ++curCol;
+ }
+ // Binary mode (where CR and LF bytes are important!)
+ else
+ {
+ QP_ENCODE_HEX(c);
+ }
- break;
- }
- /*
- Rule #2: (Literal representation) Octets with decimal values of 33
- through 60 inclusive, and 62 through 126, inclusive, MAY be
- represented as the ASCII characters which correspond to those
- octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN
- through TILDE, respectively).
- */
- default:
- {
- //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126))
- if (c >= 33 && c <= 126 && c != 61 && c != 63)
- {
- outBuffer[outBufferPos++] = c;
- ++curCol;
+ break;
}
- // Other characters: '=' + hexadecimal encoding
- else
+ case 61: // =
{
- QP_ENCODE_HEX(c)
+ QP_ENCODE_HEX('=');
+ break;
}
+ /*
+ Rule #2: (Literal representation) Octets with decimal values of 33
+ through 60 inclusive, and 62 through 126, inclusive, MAY be
+ represented as the ASCII characters which correspond to those
+ octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN
+ through TILDE, respectively).
+ */
+ default:
+
+ //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126))
+ if (c >= 33 && c <= 126 && c != 61 && c != 63)
+ {
+ outBuffer[outBufferPos++] = c;
+ ++curCol;
+ }
+ // Other characters: '=' + hexadecimal encoding
+ else
+ {
+ QP_ENCODE_HEX(c);
+ }
- break;
- }
+ break;
- }
+ } // switch (c)
- // Soft line break : "=\r\n"
- if (cutLines && curCol >= maxLineLength - 1)
- {
- outBuffer[outBufferPos] = '=';
- outBuffer[outBufferPos + 1] = '\r';
- outBuffer[outBufferPos + 2] = '\n';
+ // Soft line break : "=\r\n"
+ if (cutLines && curCol >= maxLineLength - 1)
+ {
+ outBuffer[outBufferPos] = '=';
+ outBuffer[outBufferPos + 1] = '\r';
+ outBuffer[outBufferPos + 2] = '\n';
- outBufferPos += 3;
- curCol = 0;
- }
+ outBufferPos += 3;
+ curCol = 0;
+ }
+
+ } // !rfc2047
++inTotal;
diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp
index 22994ed..67bd7a1 100644
--- a/src/wordEncoder.cpp
+++ b/src/wordEncoder.cpp
@@ -150,29 +150,9 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength)
while ((inputCount == 0 || outputCount < maxLength) && (inputCount < remaining))
{
const unsigned char c = m_buffer[m_pos + inputCount];
- bool encoded = true;
-
- switch (c)
- {
- case ',':
- case ';':
- case ':':
- case '_':
- case '=':
-
- encoded = true;
- break;
-
- default:
-
- if (c >= 33 && c <= 126 && c != 61)
- encoded = false;
-
- break;
- }
inputCount++;
- outputCount += (encoded ? 3 : 1);
+ outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c);
}
// Encode chunk
@@ -217,28 +197,7 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength)
for (string::size_type i = 0, n = encodeBytes.length() ; i < n ; ++i)
{
const unsigned char c = encodeBytes[i];
- bool encoded = true;
-
- switch (c)
- {
- case ',':
- case ';':
- case ':':
- case '_':
- case '=':
-
- encoded = true;
- break;
-
- default:
-
- if (c >= 33 && c <= 126 && c != 61)
- encoded = false;
-
- break;
- }
-
- outputCount += (encoded ? 3 : 1);
+ outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c);
}
}
diff --git a/vmime/utility/encoder/qpEncoder.hpp b/vmime/utility/encoder/qpEncoder.hpp
index 098b4c8..a969126 100644
--- a/vmime/utility/encoder/qpEncoder.hpp
+++ b/vmime/utility/encoder/qpEncoder.hpp
@@ -47,10 +47,14 @@ public:
const std::vector <string> getAvailableProperties() const;
+ static bool RFC2047_isEncodingNeededForChar(const unsigned char c);
+ static int RFC2047_getEncodedLength(const unsigned char c);
+
protected:
static const unsigned char sm_hexDigits[17];
static const unsigned char sm_hexDecodeTable[256];
+ static const unsigned char sm_RFC2047EncodeTable[128];
};
commit 3c46d1a864399d924a4a7c8a0cfdd348ecfd5fbc
Author: vincent-richard <vincent-richard@5301114d-f842-0410-bbdd-996ee0417009>
Date: Tue Oct 12 20:01:34 2010 +0000
Fixed missing whitespace in text parsing.
git-svn-id: https://vmime.svn.sourceforge.net/svnroot/vmime/trunk@568 5301114d-f842-0410-bbdd-996ee0417009
diff --git a/src/text.cpp b/src/text.cpp
index a2fe060..2454456 100644
--- a/src/text.cpp
+++ b/src/text.cpp
@@ -299,6 +299,12 @@ void text::createFromString(const string& in, const charset& ch)
}
else
{
+ if (count)
+ {
+ ref <word> w = getWordAt(getWordCount() - 1);
+ w->getBuffer() += ' ';
+ }
+
appendWord(vmime::create <word>(chunk, ch));
prevIs8bit = true;
@@ -314,6 +320,12 @@ void text::createFromString(const string& in, const charset& ch)
}
else
{
+ if (count)
+ {
+ ref <word> w = getWordAt(getWordCount() - 1);
+ w->getBuffer() += ' ';
+ }
+
appendWord(vmime::create <word>
(chunk, charset(charsets::US_ASCII)));
diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp
index b455d91..c60da5a 100644
--- a/tests/parser/textTest.cpp
+++ b/tests/parser/textTest.cpp
@@ -47,6 +47,9 @@ VMIME_TEST_SUITE_BEGIN
VMIME_TEST(testWordGenerateQuote)
VMIME_TEST(testWordGenerateSpecialCharsets)
VMIME_TEST(testWordGenerateSpecials)
+
+ VMIME_TEST(testWhitespace)
+ VMIME_TEST(testWhitespaceMBox)
VMIME_TEST_LIST_END
@@ -141,9 +144,9 @@ VMIME_TEST_SUITE_BEGIN
t2.createFromString(s2, c2);
VASSERT_EQ("2.1", 3, t2.getWordCount());
- VASSERT_EQ("2.2", "some ASCII characters and special chars:", t2.getWordAt(0)->getBuffer());
+ VASSERT_EQ("2.2", "some ASCII characters and special chars: ", t2.getWordAt(0)->getBuffer());
VASSERT_EQ("2.3", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(0)->getCharset());
- VASSERT_EQ("2.4", "\xf1\xf2\xf3\xf4", t2.getWordAt(1)->getBuffer());
+ VASSERT_EQ("2.4", "\xf1\xf2\xf3\xf4 ", t2.getWordAt(1)->getBuffer());
VASSERT_EQ("2.5", c2, t2.getWordAt(1)->getCharset());
VASSERT_EQ("2.6", "and then more ASCII chars.", t2.getWordAt(2)->getBuffer());
VASSERT_EQ("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset());
@@ -378,5 +381,52 @@ VMIME_TEST_SUITE_BEGIN
vmime::word("\x22\xC3\x9Cml\xC3\xA4ute\x22", vmime::charset("UTF-8")).generate());
}
+ void testWhitespace()
+ {
+ // Create
+ vmime::text text;
+ text.createFromString("Achim Br\xc3\xa4ndt", vmime::charsets::UTF_8);
+
+ VASSERT_EQ("1", 2, text.getWordCount());
+ VASSERT_EQ("2", "Achim ", text.getWordAt(0)->getBuffer());
+ VASSERT_EQ("3", "us-ascii", text.getWordAt(0)->getCharset());
+ VASSERT_EQ("4", "Br\xc3\xa4ndt", text.getWordAt(1)->getBuffer());
+ VASSERT_EQ("5", "utf-8", text.getWordAt(1)->getCharset());
+
+ // Generate
+ VASSERT_EQ("6", "Achim =?utf-8?Q?Br=C3=A4ndt?=", text.generate());
+
+ // Parse
+ text.parse("=?us-ascii?Q?Achim_?= =?utf-8?Q?Br=C3=A4ndt?=");
+
+ VASSERT_EQ("7", 2, text.getWordCount());
+ VASSERT_EQ("8", "Achim ", text.getWordAt(0)->getBuffer());
+ VASSERT_EQ("9", "us-ascii", text.getWordAt(0)->getCharset());
+ VASSERT_EQ("10", "Br\xc3\xa4ndt", text.getWordAt(1)->getBuffer());
+ VASSERT_EQ("11", "utf-8", text.getWordAt(1)->getCharset());
+ }
+
+ void testWhitespaceMBox()
+ {
+ // Space MUST be encoded inside a word
+ vmime::mailbox mbox(vmime::text("Achim Br\xc3\xa4ndt", vmime::charsets::UTF_8), "me@vmime.org");
+ VASSERT_EQ("generate1", "=?us-ascii?Q?Achim_?= =?utf-8?Q?Br=C3=A4ndt?= <me@vmime.org>", mbox.generate());
+
+ vmime::text txt;
+ txt.appendWord(vmime::create <vmime::word>("Achim ", "us-ascii"));
+ txt.appendWord(vmime::create <vmime::word>("Br\xc3\xa4ndt", "utf-8"));
+ mbox = vmime::mailbox(txt, "me@vmime.org");
+ VASSERT_EQ("generate2", "=?us-ascii?Q?Achim_?= =?utf-8?Q?Br=C3=A4ndt?= <me@vmime.org>", mbox.generate());
+
+ mbox.parse("=?us-ascii?Q?Achim?= =?utf-8?Q?Br=C3=A4ndt?= <me@vmime.org>");
+ VASSERT_EQ("parse.name.count", 2, mbox.getName().getWordCount());
+ VASSERT_EQ("parse.name.word1.buffer", "Achim", mbox.getName().getWordAt(0)->getBuffer());
+ VASSERT_EQ("parse.name.word1.charset", "us-ascii", mbox.getName().getWordAt(0)->getCharset());
+ VASSERT_EQ("parse.name.word2.buffer", "Br\xc3\xa4ndt", mbox.getName().getWordAt(1)->getBuffer());
+ VASSERT_EQ("parse.name.word2.charset", "utf-8", mbox.getName().getWordAt(1)->getCharset());
+
+ VASSERT_EQ("parse.email", "me@vmime.org", mbox.getEmail());
+ }
+
VMIME_TEST_SUITE_END