Fix #7: toml_utf8_to_ucs() returns incorrect results

This commit is contained in:
CK Tan 2018-06-08 14:41:44 -07:00
parent 624013252b
commit 56c42b7aed
3 changed files with 77 additions and 7 deletions

16
toml.c
View File

@ -71,9 +71,11 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
if (0x6 == (i >> 5)) {
if (len < 2) return -1;
v = i & 0x1f;
i = *(++buf);
if (0x2 != (i >> 6)) return -1;
v = (v << 6) | (i & 0x3f);
for (int j = 0; j < 1; j++) {
i = *buf++;
if (0x2 != (i >> 6)) return -1;
v = (v << 6) | (i & 0x3f);
}
return *ret = v, (const char*) buf - orig;
}
@ -84,7 +86,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
if (len < 3) return -1;
v = i & 0x0F;
for (int j = 0; j < 2; j++) {
i = *(++buf);
i = *buf++;
if (0x2 != (i >> 6)) return -1;
v = (v << 6) | (i & 0x3f);
}
@ -98,7 +100,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
if (len < 4) return -1;
v = i & 0x07;
for (int j = 0; j < 3; j++) {
i = *(++buf);
i = *buf++;
if (0x2 != (i >> 6)) return -1;
v = (v << 6) | (i & 0x3f);
}
@ -112,7 +114,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
if (len < 5) return -1;
v = i & 0x03;
for (int j = 0; j < 4; j++) {
i = *(++buf);
i = *buf++;
if (0x2 != (i >> 6)) return -1;
v = (v << 6) | (i & 0x3f);
}
@ -126,7 +128,7 @@ int toml_utf8_to_ucs(const char* orig, int len, int64_t* ret)
if (len < 6) return -1;
v = i & 0x01;
for (int j = 0; j < 5; j++) {
i = *(++buf);
i = *buf++;
if (0x2 != (i >> 6)) return -1;
v = (v << 6) | (i & 0x3f);
}

11
unittest/Makefile Normal file
View File

@ -0,0 +1,11 @@
CFLAGS = -g -I..
TESTS = t1
all: $(TESTS)
t1: t1.c ../toml.c
clean:
rm -f $(TESTS)

57
unittest/t1.c Normal file
View File

@ -0,0 +1,57 @@
#include <stdio.h>
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "../toml.h"
int main(int argc, const char* argv[])
{
char xxbuf[6], buf[6];
int64_t xxcode, code;
int xxsize;
xxsize = 2, xxcode = 0x80; memcpy(xxbuf, "\xc2\x80", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 2, xxcode = 0x7ff; memcpy(xxbuf, "\xdf\xbf", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 3, xxcode = 0x800; memcpy(xxbuf, "\xe0\xa0\x80", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 3, xxcode = 0xfffd; memcpy(xxbuf, "\xef\xbf\xbd", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 4, xxcode = 0x10000; memcpy(xxbuf, "\xf0\x90\x80\x80", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 4, xxcode = 0x1fffff; memcpy(xxbuf, "\xf7\xbf\xbf\xbf", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 5, xxcode = 0x200000; memcpy(xxbuf, "\xf8\x88\x80\x80\x80", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 5, xxcode = 0x3ffffff; memcpy(xxbuf, "\xfb\xbf\xbf\xbf\xbf", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 6, xxcode = 0x4000000; memcpy(xxbuf, "\xfc\x84\x80\x80\x80\x80", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
xxsize = 6, xxcode = 0x7fffffff; memcpy(xxbuf, "\xfd\xbf\xbf\xbf\xbf\xbf", xxsize);
assert(toml_ucs_to_utf8(xxcode, buf) == xxsize && 0 == memcmp(buf, xxbuf, xxsize));
assert(toml_utf8_to_ucs(buf, xxsize, &code) == xxsize && code == xxcode);
return 0;
}