标题:一套不是很好用的c语言UTF8读写函数 出处:Felix021 时间:Thu, 30 Sep 2010 14:08:08 +0000 作者:felix021 地址:https://www.felix021.com/blog/read.php?1936 内容: 看了下百度百科对UTF-8的说明,随手写的,基本能用。 比较诡异的是本来UTF8getchar想用strncpy的,但是这个函数有坑.... #include #include #include #include #define alloc(type, size) ((type *)malloc(sizeof(type) * size)) struct UTF8char { unsigned short length; char data[7]; }; unsigned UTF8getcharlen(const char *s) { unsigned char t = (unsigned char) s[0]; if (t < 0x80) //0xxx xxxx return 1; else if (t < 0xe0) //110x xxxx return 2; else if (t < 0xf0) //1110 xxxx return 3; else if (t < 0xf8) //1111 0xxx return 4; else if (t < 0xfc) //1111 10xx return 5; else if (t < 0xfe) //1111 110x return 6; else //0xff return 1; } int UTF8getchar(UTF8char *c, const char *s) { int i; c->length = UTF8getcharlen(s); for (i = 0; i < c->length && s[i] != 0; i++) c->data[i] = s[i]; c->data[i] = 0; return (i == c->length); } int UTF8cmp(const UTF8char *c, const char *s) { return strncmp(c->data, s, c->length); } int main() { char linebuf[4096]; UTF8char c; scanf("%s", linebuf); int reti = UTF8getchar(&c, linebuf); assert(reti != 0); printf("%d %s\n", c.length, c.data); return 0; } Generated by Bo-blog 2.1.0