Skip to content

Commit

Permalink
Add new utf8 test-vector, update comments explaining utf8 decoding
Browse files Browse the repository at this point in the history
Description:
Minor changes to help test and clarify the way utf8 strings are
decoded.  This originated from my misunderstanding of the fix for
issue libtom#507.

Testing:

  $ make clean
  $ make CFLAGS="-DUSE_LTM -DLTM_DESC -I../libtommath" EXTRALIBS="../libtommath/libtommath.a" test
  $ ./test

You can confirm that the new utf8 test data is correct using python:

  >>> s="\xD7\xA9\xD7\x9C\xD7\x95\xD7\x9D"
  >>> s.decode("utf-8")
  u'\u05e9\u05dc\u05d5\u05dd'
  • Loading branch information
jamuir committed Dec 24, 2020
1 parent 954ab9b commit 80f2ff9
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 5 deletions.
12 changes: 7 additions & 5 deletions src/pk/asn1/der/utf8/der_decode_utf8_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,23 @@ int der_decode_utf8_string(const unsigned char *in, unsigned long inlen,

/* proceed to decode */
for (y = 0; x < inlen; ) {
/* get first byte */
/* read first byte */
tmp = in[x++];

/* count number of bytes */
/* count number of left-shifts needed to get zero in most-sig bit */
for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF);

/* for valid utf8, z is in {0,2,3,4}.
if z>0, then z-1 equals the number of additional bytes to read */
if (z == 1 || z > 4 || (x + (z - 1) > inlen)) {
return CRYPT_INVALID_PACKET;
}

/* decode, grab upper bits */
/* right-shift tmp to restore least-sig bits */
tmp >>= z;

/* grab remaining bytes */
if (z > 1) { --z; }
if (z > 0) { --z; }
/* read remaining bytes */
while (z-- != 0) {
if ((in[x] & 0xC0) != 0x80) {
return CRYPT_INVALID_PACKET;
Expand Down
20 changes: 20 additions & 0 deletions tests/der_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,8 @@ int der_test(void)
static const unsigned char utf8_1_der[] = { 0x0C, 0x07, 0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E };
static const wchar_t utf8_2[] = { 0xD55C, 0xAD6D, 0xC5B4 };
static const unsigned char utf8_2_der[] = { 0x0C, 0x09, 0xED, 0x95, 0x9C, 0xEA, 0xB5, 0xAD, 0xEC, 0x96, 0xB4 };
static const wchar_t utf8_3[] = { 0x05E9, 0x05DC, 0x05D5, 0x05DD };
static const unsigned char utf8_3_der[] = { 0x0C, 0x08, 0xD7, 0xA9, 0xD7, 0x9C, 0xD7, 0x95, 0xD7, 0x9D };

unsigned char utf8_buf[32];
wchar_t utf8_out[32];
Expand Down Expand Up @@ -1961,6 +1963,24 @@ tmp_time.off_hh);
return 1;
}

/* encode it */
x = sizeof(utf8_buf);
DO(der_encode_utf8_string(utf8_3, sizeof(utf8_3) / sizeof(utf8_3[0]), utf8_buf, &x));
if (x != sizeof(utf8_3_der) || memcmp(utf8_buf, utf8_3_der, x)) {
fprintf(stderr, "DER UTF8_3 encoded to %lu bytes\n", x);
for (y = 0; y < x; y++) fprintf(stderr, "%02x ", (unsigned)utf8_buf[y]);
fprintf(stderr, "\n");
return 1;
}
/* decode it */
y = sizeof(utf8_out) / sizeof(utf8_out[0]);
DO(der_decode_utf8_string(utf8_buf, x, utf8_out, &y));
if (y != (sizeof(utf8_3) / sizeof(utf8_3[0])) || memcmp(utf8_3, utf8_out, y * sizeof(wchar_t))) {
fprintf(stderr, "DER UTF8_3 decoded to %lu wchar_t\n", y);
for (x = 0; x < y; x++) fprintf(stderr, "%04lx ", (unsigned long)utf8_out[x]);
fprintf(stderr, "\n");
return 1;
}

der_set_test();
der_flexi_test();
Expand Down

0 comments on commit 80f2ff9

Please sign in to comment.