diff --git a/test/unit/utf8.c b/test/unit/utf8.c index a05ac9c4..c58e1529 100644 --- a/test/unit/utf8.c +++ b/test/unit/utf8.c @@ -67,7 +67,7 @@ static void TEST_utf8_boundary_conditions(void) /* Non character */ utf8_helper("2.2.3 3 bytes (U-0000FFFF): \"￿\"", MOSQ_ERR_MALFORMED_UTF8); /* Non character */ - utf8_helper("2.2.4 4 bytes (U-0010FFFF): \"����\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("2.2.4 4 bytes (U-0010FFFF): \"\"", MOSQ_ERR_MALFORMED_UTF8); /* 2.3 Other boundary conditions */ @@ -77,7 +77,7 @@ static void TEST_utf8_boundary_conditions(void) /* Non character */ utf8_helper("2.3.4 U-0010FFFF = f4 8f bf bf = \"􏿿\"", MOSQ_ERR_MALFORMED_UTF8); /* This used to be valid in pre-2003 utf-8 */ - utf8_helper("2.3.5 U-00110000 = f4 90 80 80 = \"����\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("2.3.5 U-00110000 = f4 90 80 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); } @@ -87,14 +87,14 @@ static void TEST_utf8_malformed_sequences(void) int i; /* 3 Malformed sequences */ /* 3.1 Unexpected continuation bytes */ - utf8_helper("3.1.1 First continuation byte 0x80: \"�\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.1.2 Last continuation byte 0xbf: \"�\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.1.3 2 continuation bytes: \"��\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.1.4 3 continuation bytes: \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.1.5 4 continuation bytes: \"����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.1.6 5 continuation bytes: \"�����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.1.7 6 continuation bytes: \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.1.8 7 continuation bytes: \"�������\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.1 First continuation byte 0x80: \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.2 Last continuation byte 0xbf: \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.3 2 continuation bytes: \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.4 3 continuation bytes: \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.5 4 continuation bytes: \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.6 5 continuation bytes: \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.7 6 continuation bytes: \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.1.8 7 continuation bytes: \"\"", MOSQ_ERR_MALFORMED_UTF8); /* 3.1.9 Sequence of all 64 possible continuation bytes (0x80-0xbf): */ memset(buf, 0, sizeof(buf)); @@ -124,7 +124,7 @@ static void TEST_utf8_malformed_sequences(void) /* 3.2.1 All 32 first bytes of 2-byte sequences (0xc0-0xdf), each followed by a space character: */ - utf8_helper("� � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � ", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper(" ", MOSQ_ERR_MALFORMED_UTF8); for(i=0xC0; i<0xE0; i++){ buf[0] = (uint8_t)i; buf[1] = ' '; @@ -134,7 +134,7 @@ static void TEST_utf8_malformed_sequences(void) /* 3.2.2 All 16 first bytes of 3-byte sequences (0xe0-0xef), each followed by a space character: */ - utf8_helper("\"� � � � � � � � � � � � � � � � \"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("\" \"", MOSQ_ERR_MALFORMED_UTF8); for(i=0xe0; i<0xf0; i++){ buf[0] = (uint8_t)i; buf[1] = ' '; @@ -144,7 +144,7 @@ static void TEST_utf8_malformed_sequences(void) /* 3.2.3 All 8 first bytes of 4-byte sequences (0xf0-0xf7), each followed by a space character: */ - utf8_helper("\"� � � � � � � � \"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("\" \"", MOSQ_ERR_MALFORMED_UTF8); for(i=0xF0; i<0xF8; i++){ buf[0] = (uint8_t)i; buf[1] = ' '; @@ -154,7 +154,7 @@ static void TEST_utf8_malformed_sequences(void) /* 3.2.4 All 4 first bytes of 5-byte sequences (0xf8-0xfb), each followed by a space character: */ - utf8_helper("\"� � � � \"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("\" \"", MOSQ_ERR_MALFORMED_UTF8); for(i=0xF8; i<0xFC; i++){ buf[0] = (uint8_t)i; buf[1] = ' '; @@ -164,9 +164,9 @@ static void TEST_utf8_malformed_sequences(void) /* 3.2.5 All 2 first bytes of 6-byte sequences (0xfc-0xfd), each followed by a space character: */ - utf8_helper("\"� � \"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("� ", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("� ", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("\" \"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper(" ", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper(" ", MOSQ_ERR_MALFORMED_UTF8); for(i=0xFC; i<0xFE; i++){ buf[0] = (uint8_t)i; buf[1] = ' '; @@ -180,31 +180,31 @@ static void TEST_utf8_malformed_sequences(void) malformed sequence, i.e., you should see only a single replacement character in each of the next 10 tests. (Characters as in section 2) */ - utf8_helper("3.3.1 2-byte sequence with last byte missing (U+0000): \"�\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.2 3-byte sequence with last byte missing (U+0000): \"��\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.3 4-byte sequence with last byte missing (U+0000): \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.4 5-byte sequence with last byte missing (U+0000): \"����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.5 6-byte sequence with last byte missing (U+0000): \"�����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.6 2-byte sequence with last byte missing (U-000007FF): \"�\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.7 3-byte sequence with last byte missing (U-0000FFFF): \"�\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.8 4-byte sequence with last byte missing (U-001FFFFF): \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.9 5-byte sequence with last byte missing (U-03FFFFFF): \"����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): \"�����\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.1 2-byte sequence with last byte missing (U+0000): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.2 3-byte sequence with last byte missing (U+0000): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.3 4-byte sequence with last byte missing (U+0000): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.4 5-byte sequence with last byte missing (U+0000): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.5 6-byte sequence with last byte missing (U+0000): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.6 2-byte sequence with last byte missing (U-000007FF): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.7 3-byte sequence with last byte missing (U-0000FFFF): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.8 4-byte sequence with last byte missing (U-001FFFFF): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.9 5-byte sequence with last byte missing (U-03FFFFFF): \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): \"\"", MOSQ_ERR_MALFORMED_UTF8); /* 3.4 Concatenation of incomplete sequences All the 10 sequences of 3.3 concatenated, you should see 10 malformed sequences being signalled:*/ - utf8_helper("\"�����������������������������\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("\"\"", MOSQ_ERR_MALFORMED_UTF8); /* 3.5 Impossible bytes The following two bytes cannot appear in a correct UTF-8 string */ - utf8_helper("3.5.1 fe = \"�\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.5.2 ff = \"�\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("3.5.3 fe fe ff ff = \"����\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.5.1 fe = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.5.2 ff = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("3.5.3 fe fe ff ff = \"\"", MOSQ_ERR_MALFORMED_UTF8); } static void TEST_utf8_overlong_encoding(void) @@ -237,11 +237,11 @@ static void TEST_utf8_overlong_encoding(void) a replacement character. If you see a slash below, you do not have a safe UTF-8 decoder! */ - utf8_helper("4.1.1 U+002F = c0 af = \"��\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.1.2 U+002F = e0 80 af = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.1.3 U+002F = f0 80 80 af = \"����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.1.4 U+002F = f8 80 80 80 af = \"�����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.1.5 U+002F = fc 80 80 80 80 af = \"������\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.1.1 U+002F = c0 af = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.1.2 U+002F = e0 80 af = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.1.3 U+002F = f0 80 80 af = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.1.4 U+002F = f8 80 80 80 af = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.1.5 U+002F = fc 80 80 80 80 af = \"\"", MOSQ_ERR_MALFORMED_UTF8); /* 4.2 Maximum overlong sequences @@ -250,11 +250,11 @@ static void TEST_utf8_overlong_encoding(void) is a boundary test for safe UTF-8 decoders. All five characters should be rejected like malformed UTF-8 sequences. */ - utf8_helper("4.2.1 U-0000007F = c1 bf = \"��\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.2.2 U-000007FF = e0 9f bf = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.2.3 U-0000FFFF = f0 8f bf bf = \"����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.2.4 U-001FFFFF = f8 87 bf bf bf = \"�����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = \"������\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.2.1 U-0000007F = c1 bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.2.2 U-000007FF = e0 9f bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.2.3 U-0000FFFF = f0 8f bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.2.4 U-001FFFFF = f8 87 bf bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); /* 4.3 Overlong representation of the NUL character @@ -262,11 +262,11 @@ static void TEST_utf8_overlong_encoding(void) UTF-8 sequences and should not be treated like the ASCII NUL character. */ - utf8_helper("4.3.1 U+0000 = c0 80 = \"��\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.3.2 U+0000 = e0 80 80 = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.3.3 U+0000 = f0 80 80 80 = \"����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.3.4 U+0000 = f8 80 80 80 80 = \"�����\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("4.3.5 U+0000 = fc 80 80 80 80 80 = \"������\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.3.1 U+0000 = c0 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.3.2 U+0000 = e0 80 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.3.3 U+0000 = f0 80 80 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.3.4 U+0000 = f8 80 80 80 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("4.3.5 U+0000 = fc 80 80 80 80 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); } @@ -281,24 +281,24 @@ static void TEST_utf8_illegal_code_positions(void) /* 5.1 Single UTF-16 surrogates */ - utf8_helper("5.1.1 U+D800 = ed a0 80 = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.1.2 U+DB7F = ed ad bf = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.1.3 U+DB80 = ed ae 80 = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.1.4 U+DBFF = ed af bf = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.1.5 U+DC00 = ed b0 80 = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.1.6 U+DF80 = ed be 80 = \"���\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.1.7 U+DFFF = ed bf bf = \"���\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.1.1 U+D800 = ed a0 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.1.2 U+DB7F = ed ad bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.1.3 U+DB80 = ed ae 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.1.4 U+DBFF = ed af bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.1.5 U+DC00 = ed b0 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.1.6 U+DF80 = ed be 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.1.7 U+DFFF = ed bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); /* 5.2 Paired UTF-16 surrogates */ - utf8_helper("5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 = \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf = \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 = \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf = \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 = \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf = \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 = \"������\"", MOSQ_ERR_MALFORMED_UTF8); - utf8_helper("5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = \"������\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 = \"\"", MOSQ_ERR_MALFORMED_UTF8); + utf8_helper("5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = \"\"", MOSQ_ERR_MALFORMED_UTF8); /* 5.3 Noncharacter code positions