annotate mcabber/src/utf8.c @ 49:18a03a69f5e4

[/trunk] Changeset 65 by mikael * Same change to utf_decode (useless I think... but maybe cleaner).
author mikael
date Wed, 06 Apr 2005 10:23:45 +0000
parents f937475e9baa
children 1bc374915787 33b8e801ffa6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
24
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
1 #include <stdlib.h>
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
2 #include <string.h>
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
3
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
4 #include "utf8.h"
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
5
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
6
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
7 /* Desc: convert UTF8 -> ASCII
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
8 *
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
9 * In : UTF8 string
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
10 * Out : ASCII string
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
11 *
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
12 * Note: it is up to the caller to free the returned string
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
13 */
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
14 char *utf8_decode(const char *src)
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
15 {
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
16 char *ret = calloc(1, strlen(src) + 1);
49
18a03a69f5e4 [/trunk] Changeset 65 by mikael
mikael
parents: 48
diff changeset
17 unsigned char *aux = (unsigned char*)ret;
24
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
18
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
19 while (*src) {
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
20 unsigned char lead = *src++;
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
21 if ((lead & 0xe0) == 0xc0) {
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
22 unsigned char ch2 = *src++;
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
23 *aux = ((lead & 0x1f) << 6) | (ch2 & 0x3f);
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
24 } else {
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
25 *aux = lead;
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
26 }
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
27 aux++;
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
28 }
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
29
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
30 return ret;
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
31 }
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
32
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
33
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
34 /* Desc: convert ASCII -> UTF8
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
35 *
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
36 * In : ASCII string
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
37 * Out : UTF8 string
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
38 *
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
39 * Note: it is up to the caller to free the returned string
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
40 */
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
41 char *utf8_encode(const char *src)
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
42 {
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
43 char *ret = calloc(1, (strlen(src) * 2) + 1);
48
f937475e9baa [/trunk] Changeset 64 by mikael
mikael
parents: 34
diff changeset
44 unsigned char *aux = (unsigned char*)ret;
24
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
45
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
46 while (*src) {
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
47 unsigned char ch = *src++;
34
f78ffe7ce43d [/trunk] Changeset 50 by mikael
mikael
parents: 24
diff changeset
48 if (ch < 0x80U) {
f78ffe7ce43d [/trunk] Changeset 50 by mikael
mikael
parents: 24
diff changeset
49 *aux++ = ch;
48
f937475e9baa [/trunk] Changeset 64 by mikael
mikael
parents: 34
diff changeset
50 } else { /* if (ch < 0x800U) { */
34
f78ffe7ce43d [/trunk] Changeset 50 by mikael
mikael
parents: 24
diff changeset
51 *aux++ = 0xc0 | (ch >> 6);
f78ffe7ce43d [/trunk] Changeset 50 by mikael
mikael
parents: 24
diff changeset
52 *aux++ = 0x80 | (ch & 0x3f);
24
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
53 }
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
54 }
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
55
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
56 return ret;
e88b15cbf2de [/trunk] Changeset 40 by mikael
mikael
parents:
diff changeset
57 }