Sep
30
一套不是很好用的c语言UTF8读写函数
看了下百度百科对UTF-8的说明,随手写的,基本能用。
比较诡异的是本来UTF8getchar想用strncpy的,但是这个函数有坑....
转载请注明出自 ,如是转载文则注明原出处,谢谢:)
RSS订阅地址: https://www.felix021.com/blog/feed.php 。
比较诡异的是本来UTF8getchar想用strncpy的,但是这个函数有坑....
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define alloc(type, size) ((type *)malloc(sizeof(type) * size))
struct UTF8char
{
unsigned short length;
char data[7];
};
unsigned UTF8getcharlen(const char *s)
{
unsigned char t = (unsigned char) s[0];
if (t < 0x80) //0xxx xxxx
return 1;
else if (t < 0xe0) //110x xxxx
return 2;
else if (t < 0xf0) //1110 xxxx
return 3;
else if (t < 0xf8) //1111 0xxx
return 4;
else if (t < 0xfc) //1111 10xx
return 5;
else if (t < 0xfe) //1111 110x
return 6;
else //0xff
return 1;
}
int UTF8getchar(UTF8char *c, const char *s)
{
int i;
c->length = UTF8getcharlen(s);
for (i = 0; i < c->length && s[i] != 0; i++)
c->data[i] = s[i];
c->data[i] = 0;
return (i == c->length);
}
int UTF8cmp(const UTF8char *c, const char *s)
{
return strncmp(c->data, s, c->length);
}
int main()
{
char linebuf[4096];
UTF8char c;
scanf("%s", linebuf);
int reti = UTF8getchar(&c, linebuf);
assert(reti != 0);
printf("%d %s\n", c.length, c.data);
return 0;
}
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define alloc(type, size) ((type *)malloc(sizeof(type) * size))
struct UTF8char
{
unsigned short length;
char data[7];
};
unsigned UTF8getcharlen(const char *s)
{
unsigned char t = (unsigned char) s[0];
if (t < 0x80) //0xxx xxxx
return 1;
else if (t < 0xe0) //110x xxxx
return 2;
else if (t < 0xf0) //1110 xxxx
return 3;
else if (t < 0xf8) //1111 0xxx
return 4;
else if (t < 0xfc) //1111 10xx
return 5;
else if (t < 0xfe) //1111 110x
return 6;
else //0xff
return 1;
}
int UTF8getchar(UTF8char *c, const char *s)
{
int i;
c->length = UTF8getcharlen(s);
for (i = 0; i < c->length && s[i] != 0; i++)
c->data[i] = s[i];
c->data[i] = 0;
return (i == c->length);
}
int UTF8cmp(const UTF8char *c, const char *s)
{
return strncmp(c->data, s, c->length);
}
int main()
{
char linebuf[4096];
UTF8char c;
scanf("%s", linebuf);
int reti = UTF8getchar(&c, linebuf);
assert(reti != 0);
printf("%d %s\n", c.length, c.data);
return 0;
}
欢迎扫码关注:
转载请注明出自 ,如是转载文则注明原出处,谢谢:)
RSS订阅地址: https://www.felix021.com/blog/feed.php 。