24
|
1 #include <stdlib.h> |
|
2 #include <string.h> |
|
3 |
|
4 #include "utf8.h" |
|
5 |
|
6 |
|
7 /* Desc: convert UTF8 -> ASCII |
|
8 * |
|
9 * In : UTF8 string |
|
10 * Out : ASCII string |
|
11 * |
|
12 * Note: it is up to the caller to free the returned string |
|
13 */ |
|
14 char *utf8_decode(const char *src) |
|
15 { |
|
16 char *ret = calloc(1, strlen(src) + 1); |
|
17 char *aux = ret; |
|
18 |
|
19 while (*src) { |
|
20 unsigned char lead = *src++; |
|
21 if ((lead & 0xe0) == 0xc0) { |
|
22 unsigned char ch2 = *src++; |
|
23 *aux = ((lead & 0x1f) << 6) | (ch2 & 0x3f); |
|
24 } else { |
|
25 *aux = lead; |
|
26 } |
|
27 aux++; |
|
28 } |
|
29 |
|
30 return ret; |
|
31 } |
|
32 |
|
33 |
|
34 /* Desc: convert ASCII -> UTF8 |
|
35 * |
|
36 * In : ASCII string |
|
37 * Out : UTF8 string |
|
38 * |
|
39 * Note: it is up to the caller to free the returned string |
|
40 */ |
|
41 char *utf8_encode(const char *src) |
|
42 { |
|
43 char *ret = calloc(1, (strlen(src) * 2) + 1); |
|
44 char *aux = ret; |
|
45 |
|
46 while (*src) { |
|
47 unsigned char ch = *src++; |
|
48 if (ch < 0x80) { |
|
49 *aux = ch; |
|
50 } else { /* if (ch < 0x800) { */ |
|
51 *aux++ = 0xc0 | (ch >> 6 & 0x1f); |
|
52 *aux = 0xc0 | (0x80 | (ch & 0x3f)); |
|
53 } |
|
54 aux++; |
|
55 } |
|
56 |
|
57 return ret; |
|
58 } |