diff --git a/utf8decode.c b/utf8decode.c deleted file mode 100644 index 4e1ff41..0000000 --- a/utf8decode.c +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2008-2009 Bjoern Hoehrmann -// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. - -#include "utf8decode.h" - -#define UTF8_ACCEPT 0 -#define UTF8_REJECT 1 - -static const uint8_t utf8d[] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff - 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 - 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 - 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 - 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 -}; - -static uint32_t inline -decode(uint32_t* state, uint32_t* codep, uint32_t byte) { - uint32_t type = utf8d[byte]; - - *codep = (*state != UTF8_ACCEPT) ? - (byte & 0x3fu) | (*codep << 6) : - (0xff >> type) & (byte); - - *state = utf8d[256 + *state*16 + type]; - return *state; -} - -int -countCodePoints(uint8_t* s, size_t len) { - uint32_t codepoint; - uint32_t state = 0; - size_t count = 0; - -// for (count = 0; *s; ++s) - for (int i = len; i != 0; --i) { - //if (!decode(&state, &codepoint, *s)) - ++s; - if (!decode(&state, &codepoint, *s)) { - count += 1; - } - } - - return state != UTF8_ACCEPT; -// return state; -// return count; -} diff --git a/utf8decode.h b/utf8decode.h deleted file mode 100644 index d4f7f40..0000000 --- a/utf8decode.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef UTF8_DECODE_H -#define UTF8_DECODE_H - -#include -#include - -//inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte); - -extern int countCodePoints(uint8_t* s, size_t count); - -/* typedef struct { - size_t current_index; - size_t total_index; - int state; - int is_valid; - int ends_on_codepoint; -} utf8_validator_t; - -extern void utf8vld_reset (utf8_validator_t* validator); - -extern void utf8vld_validate (utf8_validator_t* validator, const uint8_t* data, size_t offset, size_t length); - -extern int utf8_valid(const uint8_t* data, size_t len); */ - -#endif // UTF8_DECODE_H diff --git a/utf8validator.c b/utf8validator.c deleted file mode 100644 index 6105658..0000000 --- a/utf8validator.c +++ /dev/null @@ -1,64 +0,0 @@ -#include "utf8validator.h" - -static const uint8_t UTF8VALIDATOR_DFA[] = -{ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df - - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff - 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 - 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 - 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 - 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // s7..s8 -}; - -#define UTF8_ACCEPT 0 -#define UTF8_REJECT 1 - -void utf8vld_reset (utf8_validator_t* validator) { - validator->state = UTF8_ACCEPT; - validator->current_index = 0; - validator->total_index = 0; - validator->is_valid = 1; - validator->ends_on_codepoint = 1; -} - -void utf8vld_validate (utf8_validator_t* validator, const uint8_t* data, size_t offset, size_t length) { - - int state = validator->state; - - for (size_t i = offset; i < length + offset; ++i) { - - state = UTF8VALIDATOR_DFA[256 + (state << 4) + UTF8VALIDATOR_DFA[data[i]]]; - - if (state == UTF8_REJECT) - { - validator->state = state; - validator->current_index = i - offset; - validator->total_index += i - offset; - validator->is_valid = 0; - validator->ends_on_codepoint = 0; - return; - } - } - - validator->state = state; - validator->current_index = length; - validator->total_index += length; - validator->is_valid = 1; - validator->ends_on_codepoint = validator->state == UTF8_ACCEPT; -} - -int utf8_valid(const uint8_t* data, size_t len) { - utf8_validator_t validator; - utf8vld_reset(&validator); - utf8vld_validate(&validator, data, 0, len); - return validator.is_valid; -} diff --git a/utf8validator.h b/utf8validator.h deleted file mode 100644 index 2cf15a3..0000000 --- a/utf8validator.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef UTF8_VALIDATOR_H -#define UTF8_VALIDATOR_H - -#include -#include - -typedef struct { - size_t current_index; - size_t total_index; - int state; - int is_valid; - int ends_on_codepoint; -} utf8_validator_t; - -extern void utf8vld_reset (utf8_validator_t* validator); - -extern void utf8vld_validate (utf8_validator_t* validator, const uint8_t* data, size_t offset, size_t length); - -extern int utf8_valid(const uint8_t* data, size_t len); - -#endif // UTF8_VALIDATOR_H